def main(): import sys log.init(verbose=True) m = model.from_json(clgen.load_json_file(sys.argv[1])) s = sampler.from_json({ "kernels": { "args": [ "__global float*", "__global float*", "__global float*", "const int" ], "max_length": 5000, "temperature": 1 }, "sampler": { "batch_size": 1000, "max_batches": 1, "static_checker": False, "dynamic_checker": False } }) print("Corpus size:", m.corpus.size) print("Vocab size: ", m.corpus.vocab_size) print() clgen.platform_info() print() outpath = "./benchmark-" + fs.basename(sys.argv[1]) info = evaluate(m, s) clgen.write_file(outpath, clgen.format_json(info))
def main(): log.init(verbose=True) m = model.from_json(clgen.load_json_file(sys.argv[1])) c = corpus.Corpus.from_json({"path": "~/data/github"}) print("CLgen: ", clgen.version()) print("Corpus size:", c.size) print("Vocab size: ", c.vocab_size) m.train() p, _ = corpus.most_common_prototypes(c, 20) for i, row in enumerate(p): outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1]) if fs.exists(outpath): continue _, prototype = row argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')] print("argspec", ','.join([str(x) for x in argspec])) s = sampler.from_json({ "kernels": { "args": argspec, "max_length": 5000 }, "sampler": { "batch_size": 2000, "max_batches": 1, "static_checker": False, "dynamic_checker": False } }) info = evaluate(m, s) clgen.write_file(outpath, clgen.format_json(info))
def test_sample(self): m = get_test_model() m.train() argspec = [ '__global float*', '__global float*', '__global float*', 'const int' ] s = sampler.from_json({ "kernels": { "args": argspec, "max_length": 300, }, "sampler": { "batch_size": 1, "max_batches": 1 } }) s.cache(m).empty() # clear old samples # sample a single kernel: s.sample(m) nun_contentfiles = dbutil.num_rows_in( s.cache(m)["kernels.db"], "ContentFiles") num_preprocessed = dbutil.num_rows_in( s.cache(m)["kernels.db"], "PreProcessedFiles") self.assertEqual(nun_contentfiles, 1) self.assertEqual(num_preprocessed, 1) s.sample(m) nun_contentfiles = dbutil.num_rows_in( s.cache(m)["kernels.db"], "ContentFiles") num_preprocessed = dbutil.num_rows_in( s.cache(m)["kernels.db"], "PreProcessedFiles") # if sample is the same as previous, then there will still only be a # single sample in db: self.assertTrue(nun_contentfiles >= 1) self.assertTrue(num_preprocessed >= 1)
def main(): log.init(verbose=True) m = model.from_json(clgen.load_json_file(sys.argv[1])) c = corpus.Corpus.from_json({"path": "~/data/github"}) print("CLgen: ", clgen.version()) print("Corpus size:", c.size) print("Vocab size: ", c.vocab_size) m.train() p, _ = corpus.most_common_prototypes(c, 20) for i, row in enumerate(p): outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1]) if fs.exists(outpath): print("skipped result for", outpath) continue else: print("starting result for", outpath) _, prototype = row argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')] print("argspec", ','.join([str(x) for x in argspec])) s = sampler.from_json({ "kernels": { "args": argspec, "max_length": 5000 }, "sampler": { "batch_size": 2000, "max_batches": 1, "static_checker": False, "dynamic_checker": False } }) info = evaluate(m, s) clgen.write_file(outpath, clgen.format_json(info))