Ejemplo n.º 1
0
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*", "__global float*", "__global float*",
                "const int"
            ],
            "max_length":
            5000,
            "temperature":
            1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
Ejemplo n.º 2
0
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*",
                "__global float*",
                "__global float*",
                "const int"
            ],
            "max_length": 5000,
            "temperature": 1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
Ejemplo n.º 3
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            continue

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
Ejemplo n.º 4
0
    def test_sample(self):
        m = get_test_model()
        m.train()

        argspec = [
            '__global float*', '__global float*', '__global float*',
            'const int'
        ]
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 300,
            },
            "sampler": {
                "batch_size": 1,
                "max_batches": 1
            }
        })

        s.cache(m).empty()  # clear old samples

        # sample a single kernel:
        s.sample(m)
        nun_contentfiles = dbutil.num_rows_in(
            s.cache(m)["kernels.db"], "ContentFiles")
        num_preprocessed = dbutil.num_rows_in(
            s.cache(m)["kernels.db"], "PreProcessedFiles")
        self.assertEqual(nun_contentfiles, 1)
        self.assertEqual(num_preprocessed, 1)

        s.sample(m)
        nun_contentfiles = dbutil.num_rows_in(
            s.cache(m)["kernels.db"], "ContentFiles")
        num_preprocessed = dbutil.num_rows_in(
            s.cache(m)["kernels.db"], "PreProcessedFiles")
        # if sample is the same as previous, then there will still only be a
        # single sample in db:
        self.assertTrue(nun_contentfiles >= 1)
        self.assertTrue(num_preprocessed >= 1)
Ejemplo n.º 5
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            print("skipped result for", outpath)
            continue
        else:
            print("starting result for", outpath)

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))