def test_to_json(): c1 = clgen.Corpus.from_json({ "language": "opencl", "path": tests.archive("tiny", "corpus") }) c2 = clgen.Corpus.from_json(c1.to_json()) assert c1 == c2
def test_bad_vocab(): with pytest.raises(clgen.UserError): clgen.Corpus.from_json({ "language": "opencl", "path": tests.archive("tiny", "corpus"), "vocab": "INVALID_VOCAB" })
def test_bad_encoding(): with pytest.raises(clgen.UserError): clgen.Corpus.from_json({ "language": "opencl", "path": tests.archive("tiny", "corpus"), "encoding": "INVALID_ENCODING" })
def test_cli(): fs.rm("kernels.db") cli.main("db init kernels.db".split()) assert fs.exists("kernels.db") corpus_path = tests.archive("tiny", "corpus") cli.main("db explore kernels.db".split()) cli.main(f"fetch fs kernels.db {corpus_path}".split()) cli.main("preprocess kernels.db".split()) cli.main("db explore kernels.db".split()) fs.rm("kernels_out") cli.main("db dump kernels.db -d kernels_out".split()) assert fs.isdir("kernels_out") assert len(fs.ls("kernels_out")) >= 1 fs.rm("kernels.cl") cli.main("db dump kernels.db kernels.cl --file-sep --eof --reverse".split()) assert fs.isfile("kernels.cl") fs.rm("kernels_out") cli.main("db dump kernels.db --input-samples -d kernels_out".split()) assert fs.isdir("kernels_out") assert len(fs.ls("kernels_out")) == 250 fs.rm("kernels.db") fs.rm("kernels_out")
def test_bad_option(): with pytest.raises(clgen.UserError): clgen.Corpus.from_json({ "language": "opencl", "path": tests.archive("tiny", "corpus"), "not_a_real_option": False })
def test_path(): path = tests.archive("tiny", "corpus") c = clgen.Corpus.from_json({ "language": "opencl", "id": TINY_HASH, "path": path }) assert TINY_HASH == c.hash
def test_preprocessed(): c1 = clgen.Corpus.from_json({ "language": "opencl", "path": tests.archive("tiny", "corpus") }) assert len(list(c1.preprocessed())) == 187 assert len(list(c1.preprocessed(1))) == 56 assert len(list(c1.preprocessed(2))) == 7
def test_hash(): c1 = clgen.Corpus.from_json({"path": tests.archive("tiny", "corpus")}) # same as c1, with explicit default opt: c2 = clgen.Corpus.from_json({ "path": tests.archive("tiny", "corpus"), "eof": False }) # different opt value: c3 = clgen.Corpus.from_json({ "path": tests.archive("tiny", "corpus"), "eof": True }) assert c1.hash == c2.hash assert c2.hash != c3.hash
def test_eq(): c1 = clgen.Corpus.from_json({ "path": tests.archive("tiny", "corpus"), "eof": False }) c2 = clgen.Corpus.from_json({ "path": tests.archive("tiny", "corpus"), "eof": False }) c3 = clgen.Corpus.from_json({ "path": tests.archive("tiny", "corpus"), "eof": True }) assert c1 == c2 assert c2 != c3 assert c1 != 'abcdef'
def test_contentfiles(): c1 = clgen.Corpus.from_json({ "language": "opencl", "path": tests.archive("tiny", "corpus") }) assert len(list(c1.contentfiles())) == 250
def test_bad_language(): with pytest.raises(clgen.UserError): clgen.Corpus.from_json({ "language": "NOTALANG", "path": tests.archive("tiny", "corpus"), })
def test_no_language(): with pytest.raises(clgen.UserError): clgen.Corpus.from_json({ "path": tests.archive("tiny", "corpus"), })
def test_explore_gh(): db_path = tests.archive("tiny-gh.db") assert (fs.exists(db_path)) clgen.explore(db_path)