예제 #1
0
파일: benchmark.py 프로젝트: SpringRi/phd
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*", "__global float*", "__global float*",
                "const int"
            ],
            "max_length":
            5000,
            "temperature":
            1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
예제 #2
0
파일: search.py 프로젝트: ChrisCummins/phd
def main():
    from argparse import ArgumentParser

    parser = ArgumentParser()
    parser.add_argument("model", help="Path to model")
    parser.add_argument("target", help="Path to target code")
    parser.add_argument("-i", "--input", metavar="path", default=None,
                        help="Path to starting code")
    parser.add_argument("-l", "--log", metavar="path", default="search-log.json",
                        help="Path to log file")
    args = parser.parse_args()

    clgen_log.init(verbose=True)

    # load and train model
    modelpath = args.model
    if modelpath.endswith(".tar.bz2"):
        m = model.from_tar(modelpath)
    else:
        model_json = clgen.load_json_file(modelpath)
        m = clgen.model.from_json(model_json)
    m.train()

    # read target code
    with open(args.target) as infile:
        target_code = infile.read()

    # read start code if provided
    start_code = None
    if args.input:
        with open(args.input) as infile:
            start_code = infile.read()

    search(m, target_code, args.log, start_code=start_code)
예제 #3
0
파일: search.py 프로젝트: 50417/phd
def main():
  from argparse import ArgumentParser

  parser = ArgumentParser()
  parser.add_argument("model", help="Path to model")
  parser.add_argument("target", help="Path to target code")
  parser.add_argument("-i", "--input", metavar="path", default=None,
                      help="Path to starting code")
  parser.add_argument("-l", "--log", metavar="path", default="search-log.json",
                      help="Path to log file")
  args = parser.parse_args()

  clgen_log.init(verbose=True)

  # load and train model
  modelpath = args.model
  if modelpath.endswith(".tar.bz2"):
    m = model.from_tar(modelpath)
  else:
    model_json = clgen.load_json_file(modelpath)
    m = clgen.model.from_json(model_json)
  m.train()

  # read target code
  with open(args.target) as infile:
    target_code = infile.read()

  # read start code if provided
  start_code = None
  if args.input:
    with open(args.input) as infile:
      start_code = infile.read()

  search(m, target_code, args.log, start_code=start_code)
예제 #4
0
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*",
                "__global float*",
                "__global float*",
                "const int"
            ],
            "max_length": 5000,
            "temperature": 1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
예제 #5
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            continue

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
예제 #6
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            print("skipped result for", outpath)
            continue
        else:
            print("starting result for", outpath)

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
예제 #7
0
def search(m, target_code, logpath, start_code=None):
    # resume search
    if fs.exists(logpath):
        log = clgen.load_json_file(logpath)
        print("resuming search of", len(get_steps(log)), "steps")
    else:
        log = []

    steps = get_steps(log)

    if start_code and not len(steps):
        code = start_code
    elif len(steps):
        code = steps[-1]['data']['code']
    else:
        code = get_start_code(m)

    target_features = get_features(target_code)
    features = get_features(code)
    distance = get_distance(target_features, features)

    if get_entries(log, "init"):
        init = get_entries(log, "init")[0]
        assert (init['data']['target_code'] == target_code)
        assert (init['data']['target_features'] == escape_features(
            target_features))

        # load history from log
        code_history = get_code_history(log)
    else:
        # create init entry
        add_to_log(log, {
            "start_code": code,
            "start_features": escape_features(features),
            "target_features": escape_features(target_features),
            "target_code": target_code,
            "distance": distance,
            "model": m.meta
        },
                   name="init")
        write_log(log, logpath)
        code_history = [code]

    # keep track of best
    if len(steps):
        best = steps[-1]['data']['best']
    else:
        best = {"distance": distance, "code": code, "improvement_count": 0}

    # maximum number of mutations before stopping search
    MAX_STEPS = 1000

    for i in range(len(steps), MAX_STEPS):
        print("step", i, "of", MAX_STEPS)
        newcode, mutate_idx, mutate_seed, attempts = get_mutation(m, code)
        try:
            features = get_features(newcode)
            distance = get_distance(target_features, features)
        except ValueError:
            newcode = None

        entry = {"count": i, "attempts": attempts}

        if newcode:
            entry["base_code"] = code
            entry["code"] = newcode
            entry["distance"] = distance
            entry["distance_diff"] = 1 - distance / best["distance"]
            entry["features"] = escape_features(features)
            entry["mutate_idx"] = mutate_idx
            entry["mutate_seed"] = mutate_seed
            code_history.append(code)
        else:
            print("    -> step back")
            # step back
            if len(code_history):
                code = code_history.pop()
            entry["step_back"] = code

        if distance < best["distance"]:
            print("    -> improvement {:.1f}%".format(entry["distance_diff"] *
                                                      100))
            best["distance"] = distance
            best["code"] = newcode
            best["features"] = escape_features(features)
            best["improvement_count"] += 1
        else:
            if newcode:
                print("    -> regression {:.1f}%".format(
                    entry["distance_diff"] * 100))

        entry["best"] = best

        add_to_log(log, entry, name="step")
        write_log(log, logpath)

        # doesn't have to be exactly zero but whatever
        if distance <= 0.001:
            print("found exact match!")
            break

    add_to_log(log, {
        "best_code": best['code'],
        "best_features": escape_features(best['features']),
        "best_distance": best['distance']
    },
               name="end")
    write_log(log, logpath)
예제 #8
0
파일: search.py 프로젝트: ChrisCummins/phd
def search(m, target_code, logpath, start_code=None):
    # resume search
    if fs.exists(logpath):
        log = clgen.load_json_file(logpath)
        print("resuming search of", len(get_steps(log)), "steps")
    else:
        log = []

    steps = get_steps(log)

    if start_code and not len(steps):
        code = start_code
    elif len(steps):
        code = steps[-1]['data']['code']
    else:
        code = get_start_code(m)

    target_features = get_features(target_code)
    features = get_features(code)
    distance = get_distance(target_features, features)

    if get_entries(log, "init"):
        init = get_entries(log, "init")[0]
        assert(init['data']['target_code'] == target_code)
        assert(init['data']['target_features'] == escape_features(target_features))

        # load history from log
        code_history = get_code_history(log)
    else:
        # create init entry
        add_to_log(log, {
            "start_code": code,
            "start_features": escape_features(features),
            "target_features": escape_features(target_features),
            "target_code": target_code,
            "distance": distance,
            "model": m.meta
        }, name="init")
        write_log(log, logpath)
        code_history = [code]

    # keep track of best
    if len(steps):
        best = steps[-1]['data']['best']
    else:
        best = {
            "distance": distance,
            "code": code,
            "improvement_count": 0
        }

    # maximum number of mutations before stopping search
    MAX_STEPS = 1000

    for i in range(len(steps), MAX_STEPS):
        print("step", i, "of", MAX_STEPS)
        newcode, mutate_idx, mutate_seed, attempts = get_mutation(m, code)
        try:
            features = get_features(newcode)
            distance = get_distance(target_features, features)
        except ValueError:
            newcode = None

        entry = {
            "count": i,
            "attempts": attempts
        }

        if newcode:
            entry["base_code"] = code
            entry["code"] = newcode
            entry["distance"] = distance
            entry["distance_diff"] = 1 - distance / best["distance"]
            entry["features"] = escape_features(features)
            entry["mutate_idx"] = mutate_idx
            entry["mutate_seed"] = mutate_seed
            code_history.append(code)
        else:
            print("    -> step back")
            # step back
            if len(code_history):
                code = code_history.pop()
            entry["step_back"] = code

        if distance < best["distance"]:
            print("    -> improvement {:.1f}%".format(
                entry["distance_diff"] * 100))
            best["distance"] = distance
            best["code"] = newcode
            best["features"] = escape_features(features)
            best["improvement_count"] += 1
        else:
            if newcode:
                print("    -> regression {:.1f}%".format(
                    entry["distance_diff"] * 100))

        entry["best"] = best

        add_to_log(log, entry, name="step")
        write_log(log, logpath)

        # doesn't have to be exactly zero but whatever
        if distance <= 0.001:
            print("found exact match!")
            break

    add_to_log(log, {
        "best_code": best['code'],
        "best_features": escape_features(best['features']),
        "best_distance": best['distance']
    }, name="end")
    write_log(log, logpath)