예제 #1
0
파일: jsonutil.py 프로젝트: BeauJoh/phd
def read_file(*components, **kwargs):
    """
  Load a JSON data blob.

  Arguments:
      path (str): Path to file.
      must_exist (bool, otional): If False, return empty dict if file does
          not exist.

  Returns:
      array or dict: JSON data.

  Raises:
      File404: If path does not exist, and must_exist is True.
      InvalidFile: If JSON is malformed.
  """
    must_exist = kwargs.get("must_exist", True)

    if must_exist:
        path = fs.must_exist(*components)
    else:
        path = fs.path(*components)

    try:
        with open(path) as infile:
            return loads(infile.read())
    except ValueError as e:
        raise ValueError(
            "malformed JSON file '{path}'. Message from parser: {err}".format(
                path=fs.basename(path), err=str(e)))
    except IOError as e:
        if not must_exist:
            return {}
        else:
            return e
예제 #2
0
파일: benchmark.py 프로젝트: BeauJoh/phd
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*", "__global float*", "__global float*",
                "const int"
            ],
            "max_length":
            5000,
            "temperature":
            1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
예제 #3
0
    def _msg(i, x):
        n = i + 1

        filename = fs.basename(x[0])
        lineno = x[1]
        fnname = x[2]

        loc = "{filename}:{lineno}".format(**vars())
        return "      #{n}  {loc: <18} {fnname}()".format(**vars())
예제 #4
0
파일: db.py 프로젝트: BeauJoh/phd
    def from_bin(path: Path = "gslang",
                 session: session_t = None) -> List['Testbed']:
        import cldrive

        with ReuseSession(session) as s:
            basename = fs.basename(path)
            version = Testbed._get_version(path)
            platform = get_or_add(s,
                                  Platform,
                                  platform=basename,
                                  version=version,
                                  host=cldrive.host_os())
            s.flush()
            return [
                get_or_add(s,
                           Testbed,
                           platform_id=platform.id,
                           optimizations=True),
            ]
예제 #5
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            print("skipped result for", outpath)
            continue
        else:
            print("starting result for", outpath)

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
예제 #6
0
def load_data_desc(platform,
                   source="B",
                   max_seq_len=1000,
                   atomizer=CharacterAtomizer,
                   quiet=False):
    """ load experimental results """
    def get_benchmarks(platform):
        B = pd.read_csv(
            fs.path("runtimes/{platform}-benchmarks.csv".format(**vars())))
        B["source"] = [escape_suite_name(x) for x in B["benchmark"]]
        B["synthetic"] = [0] * len(B)
        return B

    def get_npb_benchmarks(platform):
        B = get_benchmarks(platform)
        msk = B["source"] == "NPB"
        return B[msk]

    def get_synthetics(platform):
        S = pd.read_csv(
            fs.path("runtimes/{platform}-clgen.csv".format(**vars())))
        S["source"] = ["CLgen"] * len(S)
        S["synthetic"] = [1] * len(S)
        return S

    if source == "B":
        dataframe = get_benchmarks(platform)
    elif source == "S":
        dataframe = get_synthetics(platform)
    elif source == "BS":
        dataframe = pd.concat(
            (get_benchmarks(platform), get_synthetics(platform)))
    elif source == "N":
        dataframe = get_npb_benchmarks(platform)
    elif source == "NS":
        dataframe = pd.concat(
            (get_npb_benchmarks(platform), get_synthetics(platform)))
    else:
        raise Exception

    dataframe["oracle_enc"] = [
        1 if x == "GPU" else 0 for x in dataframe["oracle"].values
    ]
    dataframe["benchmark_name"] = [
        escape_benchmark_name(b) for b in dataframe["benchmark"].values
    ]

    # load source code:
    source_dir = fs.path("kernels")
    srcs, benchmark_names = [], []
    for row in dataframe["benchmark"].values:
        inpath = fs.path(source_dir, row + ".cl")
        with open(inpath) as infile:
            src = infile.read()
        if not src.startswith("__kernel void A"):
            print(fs.basename(inpath))
            raise Exception(src)
        srcs.append(src)
    dataframe["src"] = srcs
    dataframe["src_len"] = [len(s) for s in srcs]

    if not quiet:
        print("num instances {} ({} synthetic, {} benchmarks)".format(
            len(dataframe), sum(dataframe["synthetic"].values),
            len(dataframe) - sum(dataframe["synthetic"].values)))
        print("unique kernels", len(set(srcs)))

    # encode and pad sequences:
    atomizer = atomizer.from_text(''.join(dataframe["src"].values))

    seqs = [atomizer.atomize(seq) for seq in dataframe["src"].values]
    seq_length = min(max(len(s) for s in seqs), max_seq_len)
    pad_val = atomizer.vocab_size + 1
    dataframe["seq_len"] = [len(s) for s in seqs]
    dataframe["seq"] = list(
        pad_sequences(seqs, maxlen=seq_length, value=pad_val))

    if not quiet:
        print("vocab size", atomizer.vocab_size + 1)
        print("pad val", pad_val)
        print("padded seq length", seq_length)

    return {
        "dataframe": dataframe,
        "seq_length": seq_length,
        "atomizer": atomizer
    }
예제 #7
0
                        type=str,
                        default="cc1",
                        help="MySQL database hostname")
    args = parser.parse_args()

    db.init(args.hostname)

    with Session(commit=False) as s:
        # Export results
        #
        print("Exporting CLgen results ...")
        fs.mkdir("export/clgen/result")

        # Pick up where we left off
        done = set([
            int(fs.basename(path))
            for path in Path("export/clgen/result").iterdir()
        ])
        print(len(done), "done")
        ids = set([x[0] for x in s.query(CLgenResult.id).all()])
        print(len(ids), "in total")
        todo = ids - done
        print(len(todo), "todo")

        for result_id in ProgressBar()(todo):
            result = s.query(CLgenResult).filter(
                CLgenResult.id == result_id).scalar()

            with open(f"export/clgen/result/{result.id}", "w") as outfile:
                print(json.dumps({
                    "id":
예제 #8
0
def test_basename():
    assert "foo" == fs.basename("foo")
    assert "foo" == fs.basename(fs.abspath("foo"))
예제 #9
0
def test_must_exist():
    with tempfile.NamedTemporaryFile(prefix='labm8_') as f:
        assert fs.must_exist(f.name) == f.name
        assert fs.must_exist(fs.dirname(f.name), fs.basename(f.name)) == f.name
    with pytest.raises(fs.File404):
        fs.must_exist("/not/a/real/path")