Example #1
0
def load_and_test(model_desc,
                  platform,
                  source,
                  atomizer="CharacterAtomizer",
                  maxlen=1024,
                  n_splits=10,
                  split_i=0,
                  seed=204):
    np.random.seed(seed)

    name = model_desc["name"]
    inpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.model".format(
        **vars())
    outpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.result".format(
        **vars())

    if fs.exists(outpath):
        return load_result(model_desc,
                           platform,
                           source,
                           n_splits=n_splits,
                           split_i=split_i,
                           atomizer=atomizer,
                           maxlen=maxlen,
                           seed=seed)
    if not fs.exists(inpath):
        return False

    test_fn = model_desc["test_fn"]
    load_fn = model_desc["load_fn"]

    # load training data
    _atomizer = globals().get(atomizer)
    data_desc = load_data_desc(platform=platform,
                               source=source,
                               max_seq_len=maxlen,
                               atomizer=_atomizer,
                               quiet=True)
    train, test = get_training_data(data_desc,
                                    seed=seed,
                                    split_i=split_i,
                                    n_splits=n_splits)

    # load model
    model = load_fn(inpath)
    print("model loaded from", inpath)

    # test model
    predictions = test_fn(model=model, test=test, seed=seed)
    analysis = analyze(predictions, test)
    test.update(analysis)
    test["predictions"] = predictions

    with open(outpath, 'wb') as outfile:
        pickle.dump(test, outfile)
    print("result saved to", outpath)

    return test
Example #2
0
def test_mv():
    system.echo("Hello, world!", "/tmp/labm8.tmp")
    assert ["Hello, world!"] == fs.read("/tmp/labm8.tmp")
    # Cleanup any existing file.
    fs.rm("/tmp/labm8.tmp.copy")
    assert not fs.exists("/tmp/labm8.tmp.copy")
    fs.mv("/tmp/labm8.tmp", "/tmp/labm8.tmp.copy")
    assert ["Hello, world!"] == fs.read("/tmp/labm8.tmp.copy")
    assert not fs.exists("/tmp/labm8.tmp")
Example #3
0
def main():
    parser = ArgumentParser(description=__description__)
    parser.add_argument("classification")
    parser.add_argument("outdir")
    args = parser.parse_args()

    db.init("cc1")
    session = db.make_session()

    program_ids = [
      x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \
        .filter(CLSmithResult.classification == args.classification).all()]

    header = fs.read_file(dsmith.data_path("include", "clsmith.h"))

    fs.mkdir(args.outdir)

    for program_id in ProgressBar()(program_ids):
        outpath = fs.path(args.outdir, program_id + ".cl")

        if not fs.exists(outpath):
            program = session.query(CLSmithProgram) \
              .filter(CLSmithProgram.id == program_id).one()

            pre, post = program.src.split('#include "CLSmith.h"')

            inlined = pre + header + post

            with open(outpath, "w") as outfile:
                print(inlined, file=outfile)
Example #4
0
File: cache.py Project: BeauJoh/phd
    def __init__(self, path, basecache=None):
        """
    Create a new JSON cache.

    Optionally supports populating the cache with values of an
    existing cache.

    Arguments:
       basecache (TransientCache, optional): Cache to populate this new
         cache with.
    """

        super(JsonCache, self).__init__()
        self.path = fs.abspath(path)

        if fs.exists(self.path) and fs.read_file(self.path):
            io.debug(("Loading cache '{0}'".format(self.path)))
            with open(self.path) as file:
                self._data = json.load(file)

        if basecache is not None:
            for key, val in basecache.items():
                self._data[key] = val

        # Register exit handler
        atexit.register(self.write)
Example #5
0
File: llvm.py Project: BeauJoh/phd
def assert_program_exists(path):
    """
  Assert that a program exists.

  If the given path does not exist and is not a file, raises
  ProgramNotFoundError.
  """
    if not fs.exists(path) or not fs.isfile(path):
        raise ProgramNotFoundError(path)
Example #6
0
def test_cp_dir():
    fs.rm("/tmp/labm8")
    fs.rm("/tmp/labm8.copy")
    fs.mkdir("/tmp/labm8/foo/bar")
    assert not fs.exists("/tmp/labm8.copy")
    fs.cp("/tmp/labm8/", "/tmp/labm8.copy")
    assert fs.isdir("/tmp/labm8.copy")
    assert fs.isdir("/tmp/labm8.copy/foo")
    assert fs.isdir("/tmp/labm8.copy/foo/bar")
Example #7
0
def test_cp_overwrite():
    system.echo("Hello, world!", "/tmp/labm8.tmp")
    assert ["Hello, world!"] == fs.read("/tmp/labm8.tmp")
    # Cleanup any existing file.
    fs.rm("/tmp/labm8.tmp.copy")
    assert not fs.exists("/tmp/labm8.tmp.copy")
    fs.cp("/tmp/labm8.tmp", "/tmp/labm8.tmp.copy")
    system.echo("Goodbye, world!", "/tmp/labm8.tmp")
    fs.cp("/tmp/labm8.tmp", "/tmp/labm8.tmp.copy")
    assert fs.read("/tmp/labm8.tmp") == fs.read("/tmp/labm8.tmp.copy")
Example #8
0
File: cache.py Project: BeauJoh/phd
    def __contains__(self, key):
        """
    Check cache contents.

    Arguments:
        key: Key.

    Returns:
        bool: True if key in cache, else false.
    """
        path = self.keypath(key)
        return fs.exists(path)
Example #9
0
def test_scp():
    system.echo("Hello, world!", "/tmp/labm8.tmp")
    assert ["Hello, world!"] == fs.read("/tmp/labm8.tmp")
    # Cleanup any existing file.
    fs.rm("/tmp/labm8.tmp.copy")
    assert not fs.exists("/tmp/labm8.tmp.copy")
    # Perform scp.
    system.scp("localhost",
               "/tmp/labm8.tmp",
               "/tmp/labm8.tmp.copy",
               path="lib/labm8/data/test/bin")
    assert fs.read("/tmp/labm8.tmp") == fs.read("/tmp/labm8.tmp.copy")
Example #10
0
def train_and_save(model_desc,
                   platform,
                   source,
                   atomizer="CharacterAtomizer",
                   maxlen=1024,
                   n_splits=10,
                   split_i=0,
                   seed=204):
    np.random.seed(seed)

    name = model_desc["name"]
    outpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.model".format(
        **vars())
    if not fs.exists(outpath):
        create_fn = model_desc.get("create_model", _nop)
        train_fn = model_desc.get("train_fn", _nop)
        save_fn = model_desc["save_fn"]
        _atomizer = globals().get(atomizer)

        # load training data
        data_desc = load_data_desc(platform=platform,
                                   source=source,
                                   max_seq_len=maxlen,
                                   atomizer=_atomizer)
        train, test = get_training_data(data_desc,
                                        seed=seed,
                                        split_i=split_i,
                                        n_splits=n_splits)

        # create model
        model = create_fn(seed=seed, data_desc=data_desc)

        # train model
        train_fn(model=model,
                 train=train,
                 seed=seed,
                 platform=platform,
                 source=source)

        fs.mkdir("models/{name}".format(**vars()))
        save_fn(outpath, model)
        print("model saved as", outpath)

    # evaluate model
    return load_and_test(model_desc,
                         platform,
                         source,
                         n_splits=n_splits,
                         split_i=split_i,
                         atomizer=atomizer,
                         maxlen=maxlen,
                         seed=seed)
Example #11
0
File: cache.py Project: BeauJoh/phd
    def __delitem__(self, key):
        """
    Delete cached file.

    Arguments:
        key: Key.

    Raises:
        KeyError: If file not in cache.
    """
        path = self.keypath(key)
        if fs.exists(path):
            fs.rm(path)
        else:
            raise KeyError(key)
Example #12
0
def benchmark_inference(model_desc,
                        platform,
                        source,
                        atomizer="CharacterAtomizer",
                        maxlen=1024,
                        n_splits=10,
                        split_i=0,
                        seed=204,
                        n_runtimes=100):
    np.random.seed(seed)

    name = model_desc["name"]
    inpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.model".format(
        **vars())
    outpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.result".format(
        **vars())

    if not fs.exists(inpath):
        return False

    test_fn = model_desc["test_fn"]
    load_fn = model_desc["load_fn"]

    # load training data
    _atomizer = globals().get(atomizer)
    data_desc = load_data_desc(platform=platform,
                               source=source,
                               max_seq_len=maxlen,
                               atomizer=_atomizer,
                               quiet=True)
    train, test = get_training_data(data_desc,
                                    seed=seed,
                                    split_i=split_i,
                                    n_splits=n_splits)

    # load model
    model = load_fn(inpath)
    print("model loaded from", inpath)

    # test model
    runtimes = []
    for i in range(n_runtimes):
        start = time.time()
        predictions = test_fn(model=model, test=test, seed=seed)
        elapsed = (time.time() - start) / len(test["y"])
        runtimes.append(elapsed)

    return np.array(runtimes)
Example #13
0
def test_LockFile_force_replace_stale():
  """Test that lockfile is replaced if forced."""
  with tempfile.TemporaryDirectory() as d:
    path = pathlib.Path(d) / 'LOCK'
    lock = lockfile.LockFile(path)
    MAX_PROCESSES = 4194303  # OS-dependent. This value is for Linux
    lock.acquire(pid=MAX_PROCESSES + 1)
    assert lock.islocked
    assert not lock.owned_by_self
    with pytest.raises(lockfile.UnableToAcquireLockError):
      lock.acquire()
    lock.acquire(force=True)
    assert lock.islocked
    assert lock.owned_by_self
    lock.release()
    assert not fs.exists(lock.path)
Example #14
0
File: cache.py Project: BeauJoh/phd
    def __setitem__(self, key, value):
        """
    Emplace file in cache.

    Arguments:
        key: Key.
        value (str): Path of file to insert in cache.

    Raises:
        ValueError: If no "value" does nto exist.
    """
        if not fs.exists(value):
            raise ValueError(value)

        path = self.keypath(key)
        fs.mkdir(self.path)
        fs.mv(value, path)
Example #15
0
File: cache.py Project: BeauJoh/phd
    def __getitem__(self, key):
        """
    Get path to file in cache.

    Arguments:
        key: Key.

    Returns:
        str: Path to cache value.

    Raises:
        KeyErorr: If key not in cache.
    """
        path = self.keypath(key)
        if fs.exists(path):
            return path
        else:
            raise KeyError(key)
Example #16
0
def load_result(model_desc,
                platform,
                source,
                atomizer="CharacterAtomizer",
                maxlen=1024,
                n_splits=10,
                split_i=0,
                seed=204):
    name = model_desc["name"]
    inpath = "models/{name}/{platform}-{source}-{atomizer}:{maxlen}-{seed}-{n_splits}-{split_i}.result".format(
        **vars())
    if not fs.exists(inpath):
        return False

    with open(inpath, 'rb') as infile:
        result = pickle.load(infile)

    return result
Example #17
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            print("skipped result for", outpath)
            continue
        else:
            print("starting result for", outpath)

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
Example #18
0
def search(m, target_code, logpath, start_code=None):
    # resume search
    if fs.exists(logpath):
        log = clgen.load_json_file(logpath)
        print("resuming search of", len(get_steps(log)), "steps")
    else:
        log = []

    steps = get_steps(log)

    if start_code and not len(steps):
        code = start_code
    elif len(steps):
        code = steps[-1]['data']['code']
    else:
        code = get_start_code(m)

    target_features = get_features(target_code)
    features = get_features(code)
    distance = get_distance(target_features, features)

    if get_entries(log, "init"):
        init = get_entries(log, "init")[0]
        assert (init['data']['target_code'] == target_code)
        assert (init['data']['target_features'] == escape_features(
            target_features))

        # load history from log
        code_history = get_code_history(log)
    else:
        # create init entry
        add_to_log(log, {
            "start_code": code,
            "start_features": escape_features(features),
            "target_features": escape_features(target_features),
            "target_code": target_code,
            "distance": distance,
            "model": m.meta
        },
                   name="init")
        write_log(log, logpath)
        code_history = [code]

    # keep track of best
    if len(steps):
        best = steps[-1]['data']['best']
    else:
        best = {"distance": distance, "code": code, "improvement_count": 0}

    # maximum number of mutations before stopping search
    MAX_STEPS = 1000

    for i in range(len(steps), MAX_STEPS):
        print("step", i, "of", MAX_STEPS)
        newcode, mutate_idx, mutate_seed, attempts = get_mutation(m, code)
        try:
            features = get_features(newcode)
            distance = get_distance(target_features, features)
        except ValueError:
            newcode = None

        entry = {"count": i, "attempts": attempts}

        if newcode:
            entry["base_code"] = code
            entry["code"] = newcode
            entry["distance"] = distance
            entry["distance_diff"] = 1 - distance / best["distance"]
            entry["features"] = escape_features(features)
            entry["mutate_idx"] = mutate_idx
            entry["mutate_seed"] = mutate_seed
            code_history.append(code)
        else:
            print("    -> step back")
            # step back
            if len(code_history):
                code = code_history.pop()
            entry["step_back"] = code

        if distance < best["distance"]:
            print("    -> improvement {:.1f}%".format(entry["distance_diff"] *
                                                      100))
            best["distance"] = distance
            best["code"] = newcode
            best["features"] = escape_features(features)
            best["improvement_count"] += 1
        else:
            if newcode:
                print("    -> regression {:.1f}%".format(
                    entry["distance_diff"] * 100))

        entry["best"] = best

        add_to_log(log, entry, name="step")
        write_log(log, logpath)

        # doesn't have to be exactly zero but whatever
        if distance <= 0.001:
            print("found exact match!")
            break

    add_to_log(log, {
        "best_code": best['code'],
        "best_features": escape_features(best['features']),
        "best_distance": best['distance']
    },
               name="end")
    write_log(log, logpath)
Example #19
0
def test_exists():
    assert fs.exists(__file__)
    assert fs.exists("/")
    assert not fs.exists("/not/a/real/path (I hope!)")
Example #20
0
#!/usr/bin/env python3.6

import sys
from phd.lib.labm8 import crypto
from phd.lib.labm8 import fs
from progressbar import ProgressBar


if __name__ == "__main__":
  inpath = sys.argv[1]
  outdir = sys.argv[2]
  print(f"reading from {inpath} into {outdir}")

  assert fs.isfile(inpath)
  assert not fs.exists(outdir) or fs.isdir(outdir)
  fs.mkdir(outdir)

  with open(inpath) as infile:
    text = infile.read()

  kernels = text.split("// ==== START SAMPLE ====")
  kernels = [kernel.strip() for kernel in kernels if kernel.strip()]
  print(len(kernels), "kernels")

  sha1s = [crypto.sha1_str(kernel) for kernel in kernels]
  for kernel, sha1 in ProgressBar()(list(zip(kernels, sha1s))):
    with open(f"{outdir}/{sha1}.txt", "w") as outfile:
      print(kernel, file=outfile)