def evaluate(model, sampler): """ evaluate sampling efficiency """ print("starting sampling") sampler.sample(model) print("preprocessing sample") sample_db = sampler.cache(model)["kernels.db"] preprocess.preprocess_db(sample_db) num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles") num_good_kernels = dbutil.num_good_kernels(sample_db) num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles", "WHERE status=2") discard_rate = 1 - (num_good_kernels / num_kernels) ugly_rate = 1 - (num_ugly_kernels / num_kernels) total_charcount = dbutil.cc(sample_db, "ContentFiles") good_charcount = dbutil.cc(sample_db, "PreprocessedFiles", condition="WHERE status=0") return { "argspec": sampler.kernel_opts["args"], "host": system.HOSTNAME, "date": time.nowstr(), "num_kernels": num_kernels, "num_good_kernels": num_good_kernels, "discard_rate": discard_rate, "ugly_rate": ugly_rate, "total_charcount": total_charcount, "good_charcount": good_charcount, "corpus_dir": model.corpus.cache.path, "model_dir": model.cache.path, "sampler_dir": sampler.cache(model).path, }
def evaluate(model, sampler): """ evaluate sampling efficiency """ model.cache.empty() # clear checkpoint cache print("starting training") tstart = time() # start timer model.train() # train model training_time = time() - tstart # clear the sample cache sampler.cache(model).empty() # sample kernels and time print("starting sampling") tstart = time() sampler.sample(model) tend = time() elapsed = tend - tstart # preprocess sample sample_db = sampler.cache(model)["kernels.db"] preprocess.preprocess_db(sample_db) num_kernels = dbutil.num_rows_in(sample_db, "ContentFiles") num_good_kernels = dbutil.num_good_kernels(sample_db) num_ugly_kernels = dbutil.num_rows_in(sample_db, "PreprocessedFiles", "WHERE status=2") discard_rate = 1 - (num_good_kernels / num_kernels) ugly_rate = 1 - (num_ugly_kernels / num_kernels) total_charcount = dbutil.cc(sample_db, "ContentFiles") good_charcount = dbutil.cc(sample_db, "PreprocessedFiles", condition="WHERE status=0") efficiency = good_charcount / total_charcount throughput = good_charcount / elapsed return { "training_time": training_time, "sampling_time": elapsed, "num_kernels": num_kernels, "num_good_kernels": num_good_kernels, "discard_rate": discard_rate, "ugly_rate": ugly_rate, "total_charcount": total_charcount, "good_charcount": good_charcount, "efficiency": efficiency, # good_chars / total_chars "throughput": throughput, # good_chars / second "corpus_dir": model.corpus.cache.path, "model_dir": model.cache.path, "sampler_dir": sampler.cache(model).path, }
def main(model, sampler, print_file_list=False, print_corpus_dir=False, print_model_dir=False, print_sampler_dir=False) -> None: """ Main entry point for clgen. Arguments: model (str): Path to model. sample (str): Path to sampler. print_corpus_dir (bool, optional): If True, print cache path and exit. print_model_dir (bool, optional): If True, print cache path and exit. print_sampler_dir (bool, optional): If True, print cache path and exit. """ import clgen.model import clgen.sampler from clgen import log model_json = load_json_file(model) model = clgen.model.from_json(model_json) sampler_json = load_json_file(sampler) sampler = clgen.sampler.from_json(sampler_json) # print cache paths if print_file_list: files = sorted( fs.ls(model.corpus.cache.path, abspaths=True, recursive=True) + fs.ls(model.cache.path, abspaths=True, recursive=True) + fs.ls(sampler.cache(model).path, abspaths=True, recursive=True)) print('\n'.join(files)) sys.exit(0) elif print_corpus_dir: print(model.corpus.cache.path) sys.exit(0) elif print_model_dir: print(model.cache.path) sys.exit(0) elif print_sampler_dir: print(sampler.cache(model).path) sys.exit(0) model.train() sampler.sample(model)
model = clgen.model.from_tar("./paper-synthesizing-benchmarks-model/model.tar.bz2") argspec = ['__global float*', '__global float*', '__global float*', 'const int'] sampler = clgen.sampler.from_json({ "kernels": { "args": argspec, "max_length": 1000 }, "sampler": { "batch_size": 100, "max_kernels": 15 } }) print("Seed text:", clgen.sampler.serialize_argspec(argspec), "\n") sampler.cache(model).empty() sampler.sample(model) db = sampler.cache(model)["kernels.db"] num_good_kernels = clgen.dbutil.num_good_kernels(db) clgen.explore.explore(db) conn = sqlite3.connect(db) print(conn) c = conn.cursor() #c.execute("SELECT * FROM sqlite_master;") c.execute("SELECT * FROM PreprocessedFiles WHERE status=0;") rows = c.fetchall() if not(os.path.isdir("generatedkernels")):