예제 #1
0
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*",
                "__global float*",
                "__global float*",
                "const int"
            ],
            "max_length": 5000,
            "temperature": 1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
예제 #2
0
def read_file(*components, **kwargs):
    """
  Load a JSON data blob.

  Arguments:
      path (str): Path to file.
      must_exist (bool, otional): If False, return empty dict if file does
          not exist.

  Returns:
      array or dict: JSON data.

  Raises:
      File404: If path does not exist, and must_exist is True.
      InvalidFile: If JSON is malformed.
  """
    must_exist = kwargs.get("must_exist", True)

    if must_exist:
        path = fs.must_exist(*components)
    else:
        path = fs.path(*components)

    try:
        with open(path) as infile:
            return loads(infile.read())
    except ValueError as e:
        raise ValueError(
            "malformed JSON file '{path}'. Message from parser: {err}".format(
                path=fs.basename(path), err=str(e)))
    except IOError as e:
        if not must_exist:
            return {}
        else:
            return e
예제 #3
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            continue

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
예제 #4
0
def read_file(*components, **kwargs):
    """
    Load a JSON data blob.

    Arguments:
        path (str): Path to file.
        must_exist (bool, otional): If False, return empty dict if file does
            not exist.

    Returns:
        array or dict: JSON data.

    Raises:
        File404: If path does not exist, and must_exist is True.
        InvalidFile: If JSON is malformed.
    """
    must_exist = kwargs.get("must_exist", True)

    if must_exist:
        path = fs.must_exist(*components)
    else:
        path = fs.path(*components)

    try:
        with open(path) as infile:
            return loads(infile.read())
    except ValueError as e:
        raise ValueError(
            "malformed JSON file '{path}'. Message from parser: {err}"
            .format(path=fs.basename(path), err=str(e)))
    except IOError as e:
        if not must_exist:
            return {}
        else:
            return e
예제 #5
0
파일: benchmark.py 프로젝트: SpringRi/phd
def main():
    import sys

    log.init(verbose=True)
    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    s = sampler.from_json({
        "kernels": {
            "args": [
                "__global float*", "__global float*", "__global float*",
                "const int"
            ],
            "max_length":
            5000,
            "temperature":
            1
        },
        "sampler": {
            "batch_size": 1000,
            "max_batches": 1,
            "static_checker": False,
            "dynamic_checker": False
        }
    })

    print("Corpus size:", m.corpus.size)
    print("Vocab size: ", m.corpus.vocab_size)
    print()
    clgen.platform_info()
    print()

    outpath = "./benchmark-" + fs.basename(sys.argv[1])
    info = evaluate(m, s)
    clgen.write_file(outpath, clgen.format_json(info))
예제 #6
0
    def _msg(i, x):
        n = i + 1

        filename = fs.basename(x[0])
        lineno = x[1]
        fnname = x[2]

        loc = "{filename}:{lineno}".format(**vars())
        return "      #{n}  {loc: <18} {fnname}()".format(**vars())
예제 #7
0
    def _user_message_with_stacktrace(exception):
        # get limited stack trace
        _, _, tb = sys.exc_info()
        trace = "\n".join("       {file}:{ln}:{fn}".format(
            file=fs.basename(x[0]), ln=x[1], fn=x[2])
                          for x in traceback.extract_tb(tb, limit=5)[1:])

        log.fatal("""\
{err} ({type})

  stacktrace:
{stack_trace}

Please report bugs at <https://github.com/ChrisCummins/clgen/issues>\
""".format(err=e, type=type(e).__name__, stack_trace=trace))
예제 #8
0
def merge(old_oracle, dbs, path):
    """
    Merge databases into one.

    Arguments:

        dbs (list of Database): Databases to merge.
        path (str): Path to merged database.

    Returns:

        Database: merged database instance.
    """
    print("Merging {n} databases:".format(n=len(dbs) + 1))
    print("   ", old_oracle)
    for db in dbs:
        print("   ", db)
    print()

    # Make a copy of the old oracle database to work from.
    io.info("Coping", old_oracle, "->", fs.basename(path))
    fs.cp(old_oracle, path)

    target = migrate(_db.Database(path=path))

    for db in dbs + [target]:
        try:
            db.num_rows("runtimes")
        except sqlite3.DatabaseError as e:
            io.error("Broken db:", db.path)
            io.fatal(e)

    num_runtimes = [db.num_rows("runtimes") for db in dbs]
    expected_total = target.num_rows("runtimes") + sum(num_runtimes)

    target.merge(dbs)

    total = target.num_rows("runtimes")

    if total != expected_total:
        io.fatal("Expected total", expected_total,
                 "!= actual total", total)

    io.info(("Merged {num_db} databases, {n} rows"
             .format(num_db=len(dbs), n=total)))

    return target
예제 #9
0
def compile_cpp_code(code):
    """
    Compile C++ code to a dynamic library.

    Arguments:
        code (str): C++ socde.

    Returns:
        str: Path to binary.
    """
    bincache = cache.FSCache(fs.path("~/.cache/visioncpp"))

    if bincache.get(code):
        logging.info("Found cached binary {}".format(
            fs.basename(bincache[code])))
    else:
        check_for_computecpp()

        counter = {"val": 0}

        def progress(msg):
            text = "{}: {}".format(counter["val"], msg) if msg else ""
            if logging.getLogger().getEffectiveLevel() <= logging.INFO:
                end = "\n"
            else:
                end = ""
            print("\r\033[K {}".format(text), end=end)
            counter["val"] += 1
            sys.stdout.flush()

        tmpdir = mkdtemp(prefix="visioncpp-")
        try:
            progress("compiling device code ...")
            stub = stub_file(code, dir=tmpdir)
            progress("compiling host code ...")
            host = host_compile(code, stub, dir=tmpdir)
            progress("linking executable ...")
            tmpbin = link(host, dir=tmpdir)
            progress("")

            bincache[code] = tmpbin
        except Exception as e:
            rmtree(tmpdir)
            raise e
        rmtree(tmpdir)

    return bincache[code]
예제 #10
0
def merge(old_oracle, dbs, path):
    """
    Merge databases into one.

    Arguments:

        dbs (list of Database): Databases to merge.
        path (str): Path to merged database.

    Returns:

        Database: merged database instance.
    """
    print("Merging {n} databases:".format(n=len(dbs) + 1))
    print("   ", old_oracle)
    for db in dbs:
        print("   ", db)
    print()

    # Make a copy of the old oracle database to work from.
    io.info("Coping", old_oracle, "->", fs.basename(path))
    fs.cp(old_oracle, path)

    target = migrate(_db.Database(path=path))

    for db in dbs + [target]:
        try:
            db.num_rows("runtimes")
        except sqlite3.DatabaseError as e:
            io.error("Broken db:", db.path)
            io.fatal(e)

    num_runtimes = [db.num_rows("runtimes") for db in dbs]
    expected_total = target.num_rows("runtimes") + sum(num_runtimes)

    target.merge(dbs)

    total = target.num_rows("runtimes")

    if total != expected_total:
        io.fatal("Expected total", expected_total, "!= actual total", total)

    io.info(("Merged {num_db} databases, {n} rows".format(num_db=len(dbs),
                                                          n=total)))

    return target
예제 #11
0
파일: db.py 프로젝트: SpringRi/phd
    def from_bin(path: Path = "gslang",
                 session: session_t = None) -> List['Testbed']:
        import cldrive

        with ReuseSession(session) as s:
            basename = fs.basename(path)
            version = Testbed._get_version(path)
            platform = get_or_add(s,
                                  Platform,
                                  platform=basename,
                                  version=version,
                                  host=cldrive.host_os())
            s.flush()
            return [
                get_or_add(s,
                           Testbed,
                           platform_id=platform.id,
                           optimizations=True),
            ]
예제 #12
0
def file(path: str, **kwargs):
    """
    Drive an OpenCL kernel file.

    Arguments:
        path (str): Path to file
        **kwargs (dict, optional): Arguments to kernel()
    """
    with open(path) as infile:
        src = infile.read()
        kernels = clutil.get_cl_kernels(src)

        # error if there's no kernels
        if not len(kernels):
            if kwargs.get("fatal_errors", False):
                raise E_BAD_CODE("no kernels in file '{}'".format(path))
            else:
                print(path, "-", "E_BAD_CODE", '-', sep=',', file=sys.stderr)

        # execute all kernels in file
        for kernelsrc in kernels:
            kernel(kernelsrc, filename=fs.basename(path), **kwargs)
예제 #13
0
파일: cli.py 프로젝트: yasutakawada/clgen
        def _main() -> None:
            cache = clgen.cachepath()

            log.warning("Not Implemented: refresh corpuses")

            if fs.isdir(cache, "model"):
                cached_modeldirs = fs.ls(fs.path(cache, "model"), abspaths=True)
                for cached_modeldir in cached_modeldirs:
                    cached_model_id = fs.basename(cached_modeldir)
                    cached_meta = jsonutil.read_file(fs.path(cached_modeldir, "META"))

                    model = clgen.Model.from_json(cached_meta)

                    if cached_model_id != model.hash:
                        log.info(cached_model_id, '->', model.hash)

                        if fs.isdir(model.cache.path):
                            log.fatal("cache conflict", file=sys.stderr)

                        fs.mv(cached_modeldir, model.cache.path)

            log.warning("Not Implemented: refresh samplers")
예제 #14
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            print("skipped result for", outpath)
            continue
        else:
            print("starting result for", outpath)

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
예제 #15
0
파일: cli.py 프로젝트: yasutakawada/clgen
 def features_dir(csv_path):
     return fs.basename(fs.dirname(csv_path))
예제 #16
0
 def test_basename(self):
     self._test("foo", fs.basename("foo"))
     self._test("foo", fs.basename(fs.abspath("foo")))
예제 #17
0
파일: fs_test.py 프로젝트: 50417/DeepFuzzSL
def test_basename():
    assert "foo" == fs.basename("foo")
    assert "foo" == fs.basename(fs.abspath("foo"))
예제 #18
0
파일: fs_test.py 프로젝트: 50417/DeepFuzzSL
def test_must_exist():
    with tempfile.NamedTemporaryFile(prefix='labm8_') as f:
        assert fs.must_exist(f.name) == f.name
        assert fs.must_exist(fs.dirname(f.name), fs.basename(f.name)) == f.name
    with pytest.raises(fs.File404):
        fs.must_exist("/not/a/real/path")
예제 #19
0
                        type=str,
                        default="cc1",
                        help="MySQL database hostname")
    args = parser.parse_args()

    db.init(args.hostname)

    with Session(commit=False) as s:
        # Export results
        #
        print("Exporting CLgen results ...")
        fs.mkdir("export/clgen/result")

        # Pick up where we left off
        done = set([
            int(fs.basename(path))
            for path in Path("export/clgen/result").iterdir()
        ])
        print(len(done), "done")
        ids = set([x[0] for x in s.query(CLgenResult.id).all()])
        print(len(ids), "in total")
        todo = ids - done
        print(len(todo), "todo")

        for result_id in ProgressBar()(todo):
            result = s.query(CLgenResult).filter(
                CLgenResult.id == result_id).scalar()

            with open(f"export/clgen/result/{result.id}", "w") as outfile:
                print(json.dumps({
                    "id":
예제 #20
0
파일: _fetch.py 프로젝트: DhashS/clgen
def fetch_repos(db_path: Path, indir: Path, lang: clgen.Language) -> None:
    db = dbutil.connect(db_path)

    if not dbutil.is_github(db):
        raise clgen.UserError("not a GitHub database")

    c = db.cursor()

    for directory in fs.ls(indir, abspaths=True):
        # hacky hardcoded interpretation of `git remote -v`
        gitdir = fs.path(directory, ".git")
        output = subprocess.check_output(
            ["git", "--git-dir", gitdir, "remote", "-v"],
            universal_newlines=True)
        url = output.split("\n")[0].split("\t")[1].split(" ")[0]
        name = fs.basename(directory)

        output = subprocess.check_output(
            f"git --git-dir {gitdir} rev-list --format=format:'%ai' " +
            f"--max-count=1 $(git --git-dir {gitdir} rev-parse HEAD) | tail -n1",
            shell=True,
            universal_newlines=True)
        try:
            updated_at = dateutil.parser.parse(output)
        except ValueError:
            log.error(f"failed to process {name} {url}")
            continue

        c.execute("SELECT updated_at FROM Repositories WHERE url=?", (url, ))
        cached_updated_at = c.fetchone()

        # Do nothing unless updated timestamps don't match
        # if cached_updated_at and cached_updated_at[0] >= updated_at:
        #     log.verbose(name, "already in database")
        #     continue

        c.execute("DELETE FROM Repositories WHERE url=?", (url, ))
        c.execute("INSERT INTO Repositories VALUES(?,?,?,?,?,?,?,?,?)",
                  (url, "<unknown>", name, 0, 0, 0, 0, updated_at, updated_at))

        name_str = " -o ".join(
            [f"-name '*{ext}'" for ext in clgen.file_extensions(lang)])
        output = subprocess.check_output(
            f"find {directory} -type f {name_str} | grep -v '.git/' || true",
            shell=True,
            universal_newlines=True)
        files = [x.strip() for x in output.split("\n") if x.strip()]

        # nothing to import
        if not len(files):
            # log.verbose("no files in", name)
            continue

        log.verbose("processing", len(files), "files in", name)
        for path in files:
            relpath = path[len(directory) + 1:]
            try:
                contents = inline_fs_headers(path, [], lang=lang)
                sha = crypto.sha1_str(contents)
                c.execute('INSERT OR IGNORE INTO ContentFiles VALUES(?,?)',
                          (sha, contents))
                c.execute(
                    "INSERT OR IGNORE INTO ContentMeta VALUES(?,?,?,?,?)",
                    (sha, relpath, url, sha, len(contents)))
            except UnicodeDecodeError:
                log.warning("non UTF-8 file", path)

        db.commit()
        c = db.cursor()
예제 #21
0
파일: preamble.py 프로젝트: SpringRi/phd
def load_data_desc(platform, source="B",
                   max_seq_len=1000, atomizer=CharacterAtomizer,
                   quiet=False):
  """ load experimental results """

  def get_benchmarks(platform):
    B = pd.read_csv(
        fs.path("runtimes/{platform}-benchmarks.csv".format(**vars())))
    B["source"] = [escape_suite_name(x) for x in B["benchmark"]]
    B["synthetic"] = [0] * len(B)
    return B

  def get_npb_benchmarks(platform):
    B = get_benchmarks(platform)
    msk = B["source"] == "NPB"
    return B[msk]

  def get_synthetics(platform):
    S = pd.read_csv(fs.path("runtimes/{platform}-clgen.csv".format(**vars())))
    S["source"] = ["CLgen"] * len(S)
    S["synthetic"] = [1] * len(S)
    return S

  if source == "B":
    dataframe = get_benchmarks(platform)
  elif source == "S":
    dataframe = get_synthetics(platform)
  elif source == "BS":
    dataframe = pd.concat((get_benchmarks(platform), get_synthetics(platform)))
  elif source == "N":
    dataframe = get_npb_benchmarks(platform)
  elif source == "NS":
    dataframe = pd.concat(
        (get_npb_benchmarks(platform), get_synthetics(platform)))
  else:
    raise Exception

  dataframe["oracle_enc"] = [1 if x == "GPU" else 0 for x in
                             dataframe["oracle"].values]
  dataframe["benchmark_name"] = [escape_benchmark_name(b) for b in
                                 dataframe["benchmark"].values]

  # load source code:
  source_dir = fs.path("kernels")
  srcs, benchmark_names = [], []
  for row in dataframe["benchmark"].values:
    inpath = fs.path(source_dir, row + ".cl")
    with open(inpath) as infile:
      src = infile.read()
    if not src.startswith("__kernel void A"):
      print(fs.basename(inpath))
      raise Exception(src)
    srcs.append(src)
  dataframe["src"] = srcs
  dataframe["src_len"] = [len(s) for s in srcs]

  if not quiet:
    print("num instances {} ({} synthetic, {} benchmarks)".format(
        len(dataframe),
        sum(dataframe["synthetic"].values),
        len(dataframe) - sum(dataframe["synthetic"].values)))
    print("unique kernels", len(set(srcs)))

  # encode and pad sequences:
  atomizer = atomizer.from_text(''.join(dataframe["src"].values))

  seqs = [atomizer.atomize(seq) for seq in dataframe["src"].values]
  seq_length = min(max(len(s) for s in seqs), max_seq_len)
  pad_val = atomizer.vocab_size + 1
  dataframe["seq_len"] = [len(s) for s in seqs]
  dataframe["seq"] = list(pad_sequences(seqs, maxlen=seq_length, value=pad_val))

  if not quiet:
    print("vocab size", atomizer.vocab_size + 1)
    print("pad val", pad_val)
    print("padded seq length", seq_length)

  return {
    "dataframe": dataframe,
    "seq_length": seq_length,
    "atomizer": atomizer
  }
예제 #22
0
파일: _fetch.py 프로젝트: DhashS/clgen
def inline_fs_headers(path: Path,
                      stack: List[str],
                      lang: clgen.Language = clgen.Language.OPENCL,
                      topdir: Path = None) -> str:
    """
    Recursively inline headers in file.

    Parameters
    ----------
    path : str
        File.
    stack : List[str]
        File stack.
    topdir : Path
        The top level directory to stop searching for includes in.

    Returns
    -------
    str
        Inlined file.
    """
    stack.append(path)

    if topdir is None:
        topdir = fs.dirname(path)
    # shell escaped top directory
    escp_topdir = topdir.replace('"', '\\"')

    include_re = clgen.include_regexp(lang)

    with open(path, encoding="utf-8") as infile:
        src = infile.read()

    outlines = []
    for line in src.split('\n'):
        match = re.match(include_re, line)
        if match:
            # We have an import to inline!
            include = match.group("path")

            # Search for files with that name in the repository
            include_basename = fs.basename(include)
            esc_basename = include_basename.replace('"', '\\"')
            candidates = [x for x in
                subprocess.check_output(
                    f'find "{escp_topdir}" -type f -name {esc_basename}',
                    shell=True, universal_newlines=True)\
                    .split('\n')
                if x]

            # Select which file to inline:
            if len(candidates) == 1:
                # If there's exactly one match, then we're done:
                file_to_inline = candidates[0]
            elif len(candidates) > 1:
                # We have multiple candidates to inline, so we'll compare the
                # full paths (relative to the top directory) to select the one
                # whose name is the closest match:
                rel_matches = [match[len(topdir) + 1:] for match in candidates]
                distances = [
                    editdistance.eval(include, path) for path in rel_matches
                ]
                min_distance = min(distances)
                file_to_inline = candidates[distances.index(min_distance)]
                log.debug(
                    f"Inferred include '{file_to_inline}' from '{line}' with distance {min_distance}"
                )
            else:
                # We didn't find anything suitable:
                file_to_inline = None

            # Process the inline file:
            if file_to_inline in stack:
                # We've already inlined this file, so ignore it:
                outlines.append(
                    clgen.format_as_comment(
                        lang, f'[FETCH] ignored_include({line})'))
            elif file_to_inline:
                # Inline the file by recursively expanding its contents:
                outlines.append(
                    clgen.format_as_comment(lang,
                                            f'[FETCH] begin_include({line})'))
                inline_src = inline_fs_headers(file_to_inline, stack)
                outlines.append(inline_src)
                outlines.append(
                    clgen.format_as_comment(lang,
                                            f'[FETCH] end_include({line})'))
            else:
                # We didn't find anything suitable, so keep the original
                # include:
                outlines.append(
                    clgen.format_as_comment(lang,
                                            f'[FETCH] not_found({line})'))
                outlines.append(line)
        else:
            outlines.append(line)

    return '\n'.join(outlines)