Esempio n. 1
0
def clang_cl_args(target=CLANG_CL_TARGETS[0],
                  error_limit=0):
    """
    Get the Clang args to compile OpenCL.

    :return: Array of args.
    """
    libclc_include = fs.path(cfg.libclc(), 'generic', 'include')
    shim = smith.package_path(fs.path('share', 'include', 'opencl-shim.h'))

    # List of clang warnings to disable.
    disabled_warnings = [
        'ignored-pragmas',
        'implicit-function-declaration',
        'incompatible-library-redeclaration',
        'macro-redefined',
    ]

    return [
        '-I' + libclc_include,
        '-include', shim,
        '-target', target,
        '-ferror-limit={}'.format(error_limit),
        '-xcl'
    ] + ['-Wno-{}'.format(x) for x in disabled_warnings]
Esempio n. 2
0
def get_all_sampler_datasets():
    datasets = []
    sampledirs = []
    for versioncache in fs.ls(fs.path("~/.cache/clgen"), abspaths=True):
        samplerdir = fs.path(versioncache, "sampler")
        if fs.isdir(samplerdir):
            sampledirs += fs.ls(samplerdir, abspaths=True)

    for samplerdir in sampledirs:
        inpath = fs.path(samplerdir, "kernels.db")
        if fs.isfile(inpath):
            datasets.append(inpath)
    return datasets
Esempio n. 3
0
 def _init_error(err: Exception) -> None:
     """ tidy up in case of error """
     log.error("corpus creation failed. Deleting corpus files")
     paths = [
         fs.path(self.contentcache.path, "kernels.db"),
         fs.path(self.cache.path, "corpus.txt"),
         fs.path(self.cache.path, "tensor.npy"),
         fs.path(self.cache.path, "atomizer.pkl")
     ]
     for path in paths:
         if fs.exists(path):
             log.info("removing", path)
             fs.rm(path)
     raise err
Esempio n. 4
0
def test_PullFromRemoteToLocal(
        test_host: machine_spec_pb2.Host,
        test_mirrored_directory: machine_spec_pb2.MirroredDirectory):
    """Test pulling a file from remote."""
    m = LocalMirroredDirectory(test_host, test_mirrored_directory)
    with open(fs.path(m.remote_path, 'a'), 'w') as f:
        f.write('Hello, world!')

    m.PullFromRemoteToLocal()

    assert pathlib.Path(m.local_path).is_dir()
    assert (pathlib.Path(m.local_path) / 'a').is_file()
    with open(fs.path(m.local_path, 'a')) as f:
        assert f.read() == 'Hello, world!'
Esempio n. 5
0
def models() -> Iterator[Model]:
    """
    Iterate over all cached models.

    Returns
    -------
    Iterator[Model]
        An iterable over all cached models.
    """
    if fs.isdir(clgen.cachepath(), "model"):
        modeldirs = fs.ls(fs.path(clgen.cachepath(), "model"), abspaths=True)
        for modeldir in modeldirs:
            meta = jsonutil.read_file(fs.path(modeldir, "META"))
            model = Model.from_json(meta)
            yield model
Esempio n. 6
0
def data_path(*components, **kwargs) -> str:
    """
    Return absolute path to unittest data file. Data files are located in
    tests/data.

    Parameters
    ----------
    *components : str
        Relative path.
    **kwargs
        If 'exists' True, require that file exists.

    Returns
    -------
    str
        Absolute path.

    Raises
    ------
    Data404
        If path doesn"t exist.
    """
    path = fs.path(*components)
    exists = kwargs.get("exists", True)

    abspath = os.path.join(os.path.dirname(__file__), "data", path)
    if exists and not os.path.exists(abspath):
        raise Data404(abspath)
    return abspath
Esempio n. 7
0
def main():
    db = _db.Database(fs.path("joblist.db"))
    data = [
        row for row in db.execute("SELECT device,Count(*) AS count\n"
                                  "FROM jobs\n"
                                  "GROUP BY device\n"
                                  "ORDER BY count")
    ]
    io.info("Job list:")
    print(fmt.table(data, columns=("Device", "Jobs")))
    print()

    jobs = [row for row in db.execute("SELECT * FROM jobs")]

    fs.mkdir("jobs")
    logs = {
        "monza": open("jobs/monza.txt", "w"),
        "whz5": open("jobs/whz5.txt", "w"),
        "monza": open("jobs/monza.txt", "w"),
        "cec": open("jobs/cec.txt", "w"),
        "florence": open("jobs/florence.txt", "w"),
    }

    for job in jobs:
        enum_job(logs, db, *job)

    lab.exit()
Esempio n. 8
0
 def __init__(self, path):
     """
     Create a new directory lock.
     Arguments:
         path (str): Path to lock file.
     """
     self.path = fs.path(path)
Esempio n. 9
0
def data_path(*components, exists=True) -> str:
    """
    Return absolute path to unittest data file. Data files are located in
    <package>/test/data.

    Parameters
    ----------
    *components : str
        Relative path.
    exists : bool, optional
        If True, require that file exists.

    Returns
    -------
    str
        Absolute path.

    Raises
    ------
    Data404
        If path doesn't exist and 'exists' is True.
    """
    path = fs.path(*components)

    abspath = os.path.join(os.path.dirname(__file__), "data", path)
    if exists and not os.path.exists(abspath):
        raise Data404(abspath)
    return abspath
Esempio n. 10
0
def get_all_sampler_datasets(all_clgen_versions: bool=True) -> list:
    if all_clgen_versions:
        versiondirs = fs.ls(fs.path("~/.cache/clgen"), abspaths=True)
    else:
        versiondirs = [fs.path("~/.cache/clgen", clgen.version())]

    versiondirs = [v for v in versiondirs if fs.isdir(v, "sampler")]

    datasets = []
    for versiondir in versiondirs:
        for samplerdir in fs.ls(fs.path(versiondir, "sampler"), abspaths=True):
            inpath = fs.path(samplerdir, "kernels.db")
            if fs.isfile(inpath):
                datasets.append(inpath)

    return datasets
Esempio n. 11
0
def package_data(*path) -> bytes:
    """
    Read package data file.

    Parameters
    ----------
    path : str
        The relative path to the data file, e.g. 'share/foo.txt'.

    Returns
    -------
    bytes
        File contents.

    Raises
    ------
    InternalError
        In case of IO error.
    """
    # throw exception if file doesn't exist
    package_path(*path)

    try:
        return resource_string(__name__, fs.path(*path))
    except Exception:
        raise InternalError("failed to read package data '{}'".format(path))
Esempio n. 12
0
def main():
    db = _db.Database(fs.path("joblist.db"))
    data = [row for row in
            db.execute("SELECT device,Count(*) AS count\n"
                       "FROM jobs\n"
                       "GROUP BY device\n"
                       "ORDER BY count")]
    io.info("Job list:")
    print(fmt.table(data, columns=("Device", "Jobs")))
    print()

    jobs = [row for row in db.execute("SELECT * FROM jobs")]

    fs.mkdir("jobs")
    logs = {
        "monza": open("jobs/monza.txt", "w"),
        "whz5": open("jobs/whz5.txt", "w"),
        "monza": open("jobs/monza.txt", "w"),
        "cec": open("jobs/cec.txt", "w"),
        "florence": open("jobs/florence.txt", "w"),
    }

    for job in jobs:
        enum_job(logs, db, *job)

    lab.exit()
Esempio n. 13
0
def graph_ocl_stars(db_path: str) -> None:
    """
    Plot distribution of stargazers per file.
    """
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(color_codes=True)

    out_path = fs.path(IMG_DIR, '/ocl_stars.png')
    print('graph', out_path, '...')
    db = dbutil.connect(db_path)
    c = db.cursor()

    c.execute('SELECT stars FROM ContentMeta LEFT JOIN Repositories '
              'ON ContentMeta.repo_url=Repositories.url')
    stars = [x[0] for x in c.fetchall()]

    # Filter range
    data = [x for x in stars if x < 50]

    sns.distplot(data, bins=20, kde=False)
    plt.xlabel('GitHub Stargazer count')
    plt.ylabel('Number of files')
    plt.title('Stargazers per file')
    plt.savefig(out_path)
Esempio n. 14
0
    def __init__(self, path, tables={}, enable_traces=True):
        """
        Arguments:
            path (str): The path to the database file.
            tables (dictionary of {str: tuple of str}, optional): A diction
              of {name: schema} pairs, where a schema is list of tuple pairs,
              of the form: (name, type).
           enable_traces(bool, optional): Enable traces for user
             defined functions and aggregates.
        """
        self.path = fs.path(path)

        # Create directory if needed.
        parent_dir = fs.dirname(path)
        if parent_dir:
            fs.mkdir(parent_dir)

        self.connection = sql.connect(self.path)

        for name,schema in six.iteritems(tables):
            self.create_table(name, schema)

        io.debug("Opened connection to '{0}'".format(self.path))

        # Register exit handler
        atexit.register(self.close)

        # Enable traces for user defined functions and aggregates. See:
        #
        # https://docs.python.org/2/library/sqlite3.html#sqlite3.enable_callback_tracebacks
        if enable_traces:
            sql.enable_callback_tracebacks(True)
Esempio n. 15
0
    def from_json(corpus_json: dict):
        """
        Instantiate Corpus from JSON.

        Arguments:
            corpus_json (dict): Specification.

        Returns:
            Corpus: Insantiated corpus.
        """
        path = corpus_json.pop("path", None)
        uid = corpus_json.pop("id", None)

        if path:
            path = unpack_directory_if_needed(fs.abspath(path))
            if not fs.isdir(path):
                raise clgen.UserError(
                    "Corpus path '{}' is not a directory".format(path))
            uid = dirhash(path, 'sha1')
        elif uid:
            cache_path = fs.path(cache.ROOT, "corpus", uid)
            if not fs.isdir(cache_path):
                raise clgen.UserError("Corpus {} not found".format(uid))
        else:
            raise clgen.UserError("No corpus path or ID provided")

        return Corpus(uid, path=path, **corpus_json)
Esempio n. 16
0
def read_file(*components, **kwargs):
    """
    Load a JSON data blob.

    Arguments:
        path (str): Path to file.
        must_exist (bool, otional): If False, return empty dict if file does
            not exist.

    Returns:
        array or dict: JSON data.

    Raises:
        File404: If path does not exist, and must_exist is True.
        InvalidFile: If JSON is malformed.
    """
    must_exist = kwargs.get("must_exist", True)

    if must_exist:
        path = fs.must_exist(*components)
    else:
        path = fs.path(*components)

    try:
        with open(path) as infile:
            return loads(infile.read())
    except ValueError as e:
        raise ValueError(
            "malformed JSON file '{path}'. Message from parser: {err}"
            .format(path=fs.basename(path), err=str(e)))
    except IOError as e:
        if not must_exist:
            return {}
        else:
            return e
Esempio n. 17
0
def main():
  parser = ArgumentParser(description=__description__)
  parser.add_argument("classification")
  parser.add_argument("outdir")
  args = parser.parse_args()

  db.init("cc1")
  session = db.make_session()

  program_ids = [
    x[0] for x in session.query(sql.distinct(CLSmithResult.program_id)) \
      .filter(CLSmithResult.classification == args.classification).all()]

  header = fs.read_file(dsmith.data_path("include", "clsmith.h"))

  fs.mkdir(args.outdir)

  for program_id in ProgressBar()(program_ids):
    outpath = fs.path(args.outdir, program_id + ".cl")

    if not fs.exists(outpath):
      program = session.query(CLSmithProgram) \
        .filter(CLSmithProgram.id == program_id).one()

      pre, post = program.src.split('#include "CLSmith.h"')

      inlined = pre + header + post

      with open(outpath, "w") as outfile:
        print(inlined, file=outfile)
Esempio n. 18
0
def read_file(*components, **kwargs):
    """
  Load a JSON data blob.

  Arguments:
      path (str): Path to file.
      must_exist (bool, otional): If False, return empty dict if file does
          not exist.

  Returns:
      array or dict: JSON data.

  Raises:
      File404: If path does not exist, and must_exist is True.
      InvalidFile: If JSON is malformed.
  """
    must_exist = kwargs.get("must_exist", True)

    if must_exist:
        path = fs.must_exist(*components)
    else:
        path = fs.path(*components)

    try:
        with open(path) as infile:
            return loads(infile.read())
    except ValueError as e:
        raise ValueError(
            "malformed JSON file '{path}'. Message from parser: {err}".format(
                path=fs.basename(path), err=str(e)))
    except IOError as e:
        if not must_exist:
            return {}
        else:
            return e
Esempio n. 19
0
    def _main(infiles: List[TextIO], dir_mode: bool, summarise: bool,
              fatal_errors: bool, use_shum: bool, quiet: bool,
              no_header: bool) -> None:
        from clgen import features

        input_paths = [infile.name for infile in infiles]

        def features_dir(csv_path):
            return fs.basename(fs.dirname(csv_path))

        if summarise:
            stats = [features.summarize(f) for f in input_paths]

            print('dataset', *list(stats[0].keys()), sep=',')
            for path, stat in zip(input_paths, stats):
                print(features_dir(path), *list(stat.values()), sep=',')
            return

        if dir_mode:
            trees = [fs.ls(d, abspaths=True, recursive=True) for d in input_paths]
            paths = [item for sublist in trees for item in sublist]
        else:
            paths = [fs.path(f) for f in input_paths]

        features.files(paths, fatal_errors=fatal_errors, quiet=quiet,
                       use_shim=use_shim, header=not no_header)
Esempio n. 20
0
def clang_cl_args(target: str = CLANG_CL_TARGETS[0],
                  use_shim: bool = True,
                  error_limit: int = 0) -> list:
    """
    Get the Clang args to compile OpenCL.

    Arguments:
        target (str): LLVM target.
        use_shim (bool, optional): Inject shim header.
        error_limit (int, optional): Limit number of compiler errors.

    Returns:
        str[]: Array of args.
    """
    # clang warnings to disable
    disabled_warnings = [
        'ignored-pragmas',
        'implicit-function-declaration',
        'incompatible-library-redeclaration',
        'macro-redefined',
    ]

    args = [
        '-I' + fs.path(native.LIBCLC), '-target', target,
        '-ferror-limit={}'.format(error_limit), '-xcl'
    ] + ['-Wno-{}'.format(x) for x in disabled_warnings]

    if use_shim:
        args += ['-include', native.SHIMFILE]

    return args
Esempio n. 21
0
def graph_bc_lc(db_path: str) -> None:
    """
    Plot distribution of bytecode line counts.
    """
    import matplotlib.pyplot as plt
    import seaborn as sns
    sns.set(color_codes=True)

    out_path = fs.path(IMG_DIR, 'bc_lcs.png')
    print('graph', out_path, '...')
    db = dbutil.connect(db_path)
    c = db.cursor()

    c.execute("SELECT contents FROM Bytecodes")
    ocl = c.fetchall()
    ocl_lcs = [len(decode(x[0]).split('\n')) for x in ocl]

    # Filter range
    data = [x for x in ocl_lcs if x < 500]

    sns.distplot(data, bins=20, kde=False)
    plt.xlabel('Line count')
    plt.ylabel('Number of Bytecode files')
    plt.title('Distribution of Bytecode lengths')
    plt.savefig(out_path)
Esempio n. 22
0
    def _create_kernels_db(self, path: str, encoding: str = "default") -> None:
        """creates and caches kernels.db"""
        log.debug("creating database")

        # create a database and put it in the cache
        tmppath = fs.path(self.contentcache.path, "kernels.db.tmp")
        dbutil.create_db(tmppath)
        self.contentcache["kernels.db"] = tmppath

        # get a list of files in the corpus
        filelist = [
            f for f in fs.ls(path, abspaths=True, recursive=True)
            if fs.isfile(f)
        ]

        # import files into database
        fetch.fetch_fs(self.contentcache["kernels.db"], filelist)

        # preprocess files
        preprocess.preprocess_db(self.contentcache["kernels.db"])

        # encode kernel db
        encode(self.contentcache["kernels.db"], encoding)

        # print database stats
        explore.explore(self.contentcache["kernels.db"])
Esempio n. 23
0
    def to_dist(self, distpath: str, author: str = None) -> str:
        """
        Create a dist file.

        Arguments:
            distpath (str): Path to dist file.
            author (str, optional): Author name.

        Returns:
            str: Path to generated distfile.
        """
        outpath = fs.abspath(distpath) + ".tar.bz2"
        if fs.exists(outpath):
            raise DistError("file {} exists".format(outpath))

        meta = self.meta
        if author is not None:
            meta["author"] = author
        log.debug(clgen.format_json(meta))

        try:
            tar = tarfile.open(outpath, 'w:bz2')

            # write meta
            metapath = mktemp(prefix="clgen-", suffix=".json")
            clgen.write_file(metapath, clgen.format_json(meta))
            log.debug("metafile:", metapath)

            # create tarball
            tar.add(metapath, arcname="meta.json")

            # pack contents:
            for path in meta["contents"]:
                abspath = fs.path(cache.ROOT, path)
                log.verbose("packing", abspath)
                tar.add(abspath, arcname=fs.path("contents", path))

            # tidy up
            fs.rm(metapath)
            tar.close()
        except Exception as e:
            tar.close()
            fs.rm(metapath)
            fs.rm(outpath)
            raise e

        return outpath
Esempio n. 24
0
    def __init__(self, name: str):
        """
        Create filesystem cache.
        """
        self.path = fs.path(ROOT, name)
        self.name = name

        fs.mkdir(self.path)
Esempio n. 25
0
def extra_args(use_shim=False):
    args = []
    if use_shim:
        args += [
            "-DSMITH_FEATURES",
            "-include",
            smith.package_path(fs.path('share', 'include', 'opencl-shim.h'))]
    return args
Esempio n. 26
0
def get_instcount(opencl_path):
    io.debug("Reading file '%s'" % opencl_path)

    bitcode_path = fs.path("/tmp/temp.bc")

    host.system(clang_cmd(opencl_path, bitcode_path))
    instcount_output = host.check_output(instcount_cmd(bitcode_path))
    counts = parse_instcount(instcount_output)
    return counts
def get_instcount(opencl_path):
    io.debug("Reading file '%s'" % opencl_path)

    bitcode_path = fs.path("/tmp/temp.bc")

    host.system(clang_cmd(opencl_path, bitcode_path))
    instcount_output = host.check_output(instcount_cmd(bitcode_path))
    counts = parse_instcount(instcount_output)
    return counts
Esempio n. 28
0
def echo(*args, **kwargs):
    """
    Write a message to a file.

    Arguments:
        args A list of arguments which make up the message. The last argument
            is the path to the file to write to.
    """
    msg = args[:-1]
    path = fs.path(args[-1])
    append = kwargs.pop("append", False)

    if append:
        with open(path, "a") as file:
            print(*msg, file=file, **kwargs)
    else:
        with open(fs.path(path), "w") as file:
            print(*msg, file=file, **kwargs)
Esempio n. 29
0
    def _create_txt(self) -> None:
        """creates and caches corpus.txt"""
        log.debug("creating corpus")

        # TODO: additional options in corpus JSON to accomodate for EOF,
        # different encodings etc.
        tmppath = fs.path(self.cache.path, "corpus.txt.tmp")
        train(self.contentcache["kernels.db"], tmppath)
        self.cache["corpus.txt"] = tmppath
Esempio n. 30
0
def cp_loc(path, name):
    """
    Copy database from local filesystem.
    """
    path = fs.path(path)
    dst = dst_path(name)

    io.info("Copying", path, "->", name)
    fs.cp(path, dst)
Esempio n. 31
0
def echo(*args, **kwargs):
  """
  Write a message to a file.

  Arguments:
      args A list of arguments which make up the message. The last argument
          is the path to the file to write to.
  """
  msg = args[:-1]
  path = fs.path(args[-1])
  append = kwargs.pop("append", False)

  if append:
    with open(path, "a") as file:
      print(*msg, file=file, **kwargs)
  else:
    with open(fs.path(path), "w") as file:
      print(*msg, file=file, **kwargs)
Esempio n. 32
0
 def export_model(self, out_basename):
     try:
         outfile = fs.path(str(out_basename) + ".dot")
         tree.export_graphviz(self.model, out_file=outfile, # label="none",
                              max_depth=5, filled=True, rounded=True,
                              class_names=["CPU", "GPU"],
                              feature_names=["F1", "F2", "F3", "F4"])
         print("export model to '{}'".format(outfile))
     except Exception:
         pass
Esempio n. 33
0
    def keypath(self, key):
        """
        Get the filesystem path for a key.

        Arguments:
            key: Key.

        Returns:
            str: Absolute path.
        """
        return fs.path(self.path, self.escape_key(key))
Esempio n. 34
0
    def keypath(self, key: str) -> str:
        """
        Return path to key in cache.

        Arguments:
            key (str): Key.

        Returns:
            str: Absolute path.
        """
        return fs.path(self.path, self.escape(self.mapkey(key)))
Esempio n. 35
0
    def keypath(self, key):
        """
    Get the filesystem path for a key.

    Arguments:
        key: Key.

    Returns:
        str: Absolute path.
    """
        return fs.path(self.path, self.escape_key(key))
Esempio n. 36
0
def data_path(*path) -> Path:
    """
  Path to data file.

  Arguments:
      *path (List[str]): Path components.

  Returns:
      Path: Path.
  """
    return resource_filename(__name__, fs.path("data", *path))
Esempio n. 37
0
def root_path(*path) -> Path:
    """
  Path relative to dsmith source respository.

  Arguments:
      *path (List[str]): Path components.

  Returns:
      Path: Path.
  """
    return fs.path(ROOT, *path)
Esempio n. 38
0
    def cache(self, model: Model) -> Cache:
        """
        Return sampler cache.

        Arguments:
            model (Model): CLgen model.

        Returns:
            Cache: Cache.
        """
        sampler_model_hash = clgen.checksum_str(self.hash + model.hash)
        return Cache(fs.path("sampler", sampler_model_hash))
Esempio n. 39
0
def sql_script(name: str) -> str:
    """
    Read SQL script to string.

    Arguments:
        name (str): The name of the SQL script (without file extension).

    Returns:
        str: SQL script.
    """
    path = fs.path('data', 'sql', str(name) + ".sql")
    return package_str(path)
Esempio n. 40
0
def cachepath(*relative_path_components: str) -> pathlib.Path:
    """Return path to file system cache.

  Args:
    *relative_path_components: Relative path of cache.

  Returns:
    Absolute path of file system cache.
  """
    cache_root = pathlib.Path(os.environ.get("CLGEN_CACHE", "~/.cache/clgen/"))
    cache_root.expanduser().mkdir(parents=True, exist_ok=True)
    return pathlib.Path(fs.path(cache_root, *relative_path_components))
Esempio n. 41
0
def clangformat_ocl(src, id='anon'):
    clangformat = fs.path(cfg.llvm_path(), "build", "bin", "clang-format")
    cmd = [clangformat, '-style={}'.format(json.dumps(clangformat_config))]
    process = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE,
                    env=cfg.toolchain_env())
    stdout, stderr = process.communicate(src.encode('utf-8'))

    if stderr:
        print(stderr.decode('utf-8'))
    if process.returncode != 0:
        raise ClangFormatException(stderr.decode('utf-8'))

    return stdout.decode('utf-8')
Esempio n. 42
0
def make(target="all", dir=".", **kwargs):
    """
    Run make clean.

    Arguments:

        target (str, optional): Name of the target to build. Defaults
          to "all".
        dir (str, optional): Path to directory containing Makefile.
        **kwargs (optional): Any additional arguments to be passed to
          system.run().

    Returns:

        (int, str, str): The first element is the return code of the
          make command. The second and third elements are the stdout
          and stderr of the process.

    Raises:

        NoMakefileError: In case a Makefile is not found in the target
          directory.
        NoTargetError: In case the Makefile does not support the
          requested target.
        MakeError: In case the target rule fails.
    """
    if not fs.isfile(fs.path(dir, "Makefile")):
        raise NoMakefileError("No makefile in '{}'".format(fs.abspath(dir)))

    fs.cd(dir)

    # Default parameters to system.run()
    if "timeout" not in kwargs: kwargs["timeout"] = 300

    ret, out, err = system.run(["make", target], **kwargs)
    fs.cdpop()

    if ret > 0:
        if re.search(_BAD_TARGET_RE, err):
            raise NoTargetError("No rule for target '{}'"
                                .format(target))
        else:
            raise MakeError("Target '{}' failed".format(target))

        raise MakeError("Failed")

    return ret, out, err
Esempio n. 43
0
def run_example_prog(prog, args):
    """
    Run a SkelCL example program.

    Arguments:

        prog (str): The name of the program to run
        args (list of str): Any arguments
    """
    fs.cd(fs.path(experiment.EXAMPLES_BUILD, prog))
    cmd = ["./" + prog] + args
    cmd_str = " ".join(cmd)
    io.info("COMMAND:", io.colourise(io.Colours.RED, cmd_str))
    ret, _, _ = system.run(cmd, stdout=system.STDOUT, stderr=system.STDERR)
    if ret:
        system.echo(cmd_str, "/tmp/naughty.txt", append=True)

    return ret
Esempio n. 44
0
def run_job(i, n, wgsize, program, args):
    wg_c, wg_r = unhash_params(wgsize)

    # Set environment variable.
    os.environ["OMNITUNE_OFFLINE_TRAINING"] = "1"
    os.environ["OMNITUNE_STENCIL_WG_C"] = str(wg_c)
    os.environ["OMNITUNE_STENCIL_WG_R"] = str(wg_r)

    fs.cd(fs.path(experiment.EXAMPLES_BUILD, program))

    cmd_str = "./{} {}".format(program, args.rstrip())
    cmd = cmd_str.split()

    io.info(i, "of", n, " - ", wgsize, "COMMAND:", io.colourise(io.Colours.RED, cmd_str))
    ret, _, _ = system.run(cmd, stdout=system.STDOUT, stderr=system.STDERR)

    if ret:
        print(ret, wgsize, program, args, sep="\t", file=errlog)
    else:
        print(ret, wgsize, program, args, sep="\t", file=runlog)
Esempio n. 45
0
def speedup_regression(db):
    def _rank_fn(predictions):
        return sorted(predictions, key=lambda x: x[1], reversed=True)

    path = fs.path("~/data/msc-thesis/csv/speedup_stats.csv")
    dataset = RegressionDataset.load(path, db)

    # db.empty_table("speedup_classification_results")
    baseline = "4x4"

    classifiers = (
        # ml.LinearRegression(),
        ml.RandomForest(),
        # ml.SMOreg(),
        # ml.ZeroR(),
    )

    eval_fn = partial(eval_speedup_regressors, db, classifiers, baseline,
                      _rank_fn, "speedup_classification_results")
    run_eval(db, dataset, eval_fn, "speedup_classification")
Esempio n. 46
0
"""
Configuration.
"""
from itertools import product

import labm8
from labm8 import io
from labm8 import fs
from labm8 import system

DATA_ROOT = fs.path("~/data/msc-thesis")
DB_DEST = fs.path(DATA_ROOT, "db")
ORACLE_PATH = fs.path("/usr/share/omnitune/db/skelcl.db")
TAB_ROOT = fs.path(DATA_ROOT, "tab")
IMG_ROOT = fs.path(DATA_ROOT, "img")

# Path to classifier results using cross-validation.
CLASS_XVAL_PATH = fs.path(DATA_ROOT, "class_xval.json")
# Path to classifier results using synthetic training, real validation.
CLASS_SYN_PATH = fs.path(DATA_ROOT, "class_syn.json")

SRC_ROOT = fs.path("~/src/msc-thesis")

EXAMPLES_BUILD = fs.path(SRC_ROOT, "skelcl/build/examples/")
EXAMPLES_SRC = fs.path(SRC_ROOT, "skelcl/examples/")

#################################
# Synthetic benchmark arguments #
#################################
COMPLEXITIES = ([""], ["-c"])
Esempio n. 47
0
def load(src, loader="weka.core.converters.ArffLoader", **kwargs):
    if not MODULE_SUPPORTED: return
    loader = WekaLoader(classname=loader, **kwargs)
    return loader.load_file(fs.path(src))
Esempio n. 48
0
def save(data, dst, saver="weka.core.converters.ArffSaver", **kwargs):
    if not MODULE_SUPPORTED: return
    saver = WekaSaver(classname=saver, **kwargs)
    saver.save_file(data, fs.path(dst))
Esempio n. 49
0
import re
import subprocess

import labm8
from labm8 import fs
from labm8 import io

_LINE_RE = re.compile("^(?P<count>\d+) instcount - Number of (?P<type>.+)")

DEFAULT_LLVM_PATH = fs.path("~/src/msc-thesis/skelcl/libraries/llvm/build/bin/")


class Error(Exception):
    """
    LLVM module error.
    """
    pass


class ProgramNotFoundError(Error):
    """
    Error thrown if a program is not found.
    """
    pass


class ClangError(Error):
    """
    Error thrown if clang exits with non-zero status.
    """
    pass
Esempio n. 50
0
import itertools

import labm8
from labm8 import fs

ROOT = fs.path("~/src/msc-thesis/scraps/05-20")
SkelCL benchmark applications.
"""


import json
import os
import re
import subprocess
import sys

import labm8 as lab
from labm8 import fs
from labm8 import io
from labm8 import host

CLANG = fs.path("/home/chris/src/msc-thesis/llvm/Release+Asserts/bin/clang")
OPT = fs.path("/home/chris/src/msc-thesis/llvm/Release+Asserts/bin/opt")

def print_help():
    print("Get instruction counts for an OpenCL source file.")
    print()
    print("    Usage: %s <kernel>.cl" % sys.argv[0])
    print()
    print("Accepts a path to an OpenCL file, and compiles to LLVM bytecode.")
    print("The LLVM InstCount pass is then performed on this bytecode, and the")
    print("results printed and returned.")

def clang_cmd(opencl_input, bitcode_output):
    return [CLANG,
            "-Dcl_clang_storage_class_specifiers",
            "-isystem", "libclc/generic/include",
Esempio n. 52
0
 def test_path(self):
     self._test("foo/bar", fs.path("foo", "bar"))
     self._test("foo/bar/car", fs.path("foo/bar", "car"))
Esempio n. 53
0
def toolchain_env():
    return {'LD_LIBRARY_PATH': fs.path(llvm_path(), "build", "lib")}
Esempio n. 54
0
 def test_path_homedir(self):
     self._test(os.path.expanduser("~"), fs.path("~"))
     self._test(os.path.join(os.path.expanduser("~"), "foo"),
                fs.path("~", "foo"))
Esempio n. 55
0
def dst_path(name):
    """
    Get destination path for named database.
    """
    return fs.path(experiment.DB_DEST, name + ".db")
Esempio n. 56
0
import itertools

import labm8
from labm8 import io
from labm8 import fs
from labm8 import system


ROOT = fs.path("~/src/msc-thesis")

SIMPLEBIG_BUILD = fs.path(ROOT, "skelcl/build/examples/SimpleBig")
SIMPLEBIG_BUILD_BIN = fs.path(SIMPLEBIG_BUILD, "SimpleBig")

SIMPLEBIG_SRC = fs.path(ROOT, "skelcl/examples/SimpleBig")
SIMPLEBIG_SRC_HOST = fs.path(SIMPLEBIG_SRC, "main.cpp")

DATABASE_ROOT = fs.path(ROOT, "data")
DATABASES = [
    fs.path(DATABASE_ROOT, "omnitune.skelcl.cec.db"),
    fs.path(DATABASE_ROOT, "omnitune.skelcl.dhcp-90-060.db"),
    fs.path(DATABASE_ROOT, "omnitune.skelcl.florence.db"),
    fs.path(DATABASE_ROOT, "omnitune.skelcl.monza.db"),
    fs.path(DATABASE_ROOT, "omnitune.skelcl.tim.db"),
    fs.path(DATABASE_ROOT, "omnitune.skelcl.whz5.db")
]
DATABASE_ORACLE = "omnitune.oracle.db"

## Arguments

COMPLEXITIES = ([""], ["-c"])
Esempio n. 57
0
def main():
    db = _db.Database(experiment.ORACLE_PATH)
    ml.start()

    # Delete any old stuff.
    fs.rm(experiment.IMG_ROOT + "/*")
    fs.rm(experiment.TAB_ROOT + "/*")

    # Make directories
    fs.mkdir(experiment.TAB_ROOT)
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/bars"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/heatmap"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "scenarios/trisurf"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "coverage/datasets"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "safety/datasets"))

    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/devices"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/kernels"))
    fs.mkdir(fs.path(experiment.IMG_ROOT, "oracle/datasets"))

    visualise.pie(db.num_scenarios_by_device,
                  fs.path(experiment.IMG_ROOT, "num_sceanrios_by_device"))
    visualise.pie(db.num_runtime_stats_by_device,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_device"))
    visualise.pie(db.num_scenarios_by_dataset,
                  fs.path(experiment.IMG_ROOT, "num_sceanrios_by_dataset"))
    visualise.pie(db.num_runtime_stats_by_dataset,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_dataset"))
    visualise.pie(db.num_runtime_stats_by_kernel,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel"))
    visualise.pie(db.num_runtime_stats_by_kernel,
                  fs.path(experiment.IMG_ROOT, "num_runtime_stats_by_kernel"))

    # Per-scenario plots
    for row in db.scenario_properties:
        scenario,device,kernel,north,south,east,west,max_wgsize,width,height,tout = row
        title = ("{device}: {kernel}[{n},{s},{e},{w}]\n"
                 "{width} x {height} {type}s"
                 .format(device=text.truncate(device, 18), kernel=kernel,
                         n=north, s=south, e=east, w=west,
                         width=width, height=height, type=tout))
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/heatmap/{id}.png".format(id=scenario))
        space = _space.ParamSpace.from_dict(db.perf_scenario(scenario))
        max_c = min(25, len(space.c))
        max_r = min(25, len(space.r))
        space.reshape(max_c=max_c, max_r=max_r)

        # Heatmaps.
        mask = _space.ParamSpace(space.c, space.r)
        for j in range(len(mask.r)):
            for i in range(len(mask.c)):
                if space.matrix[j][i] == 0:
                    r, c = space.r[j], space.c[i]
                    # TODO: Get values from refused_params table.
                    if r * c >= max_wgsize:
                        # Illegal
                        mask.matrix[j][i] = -1
                    else:
                        # Refused
                        db.execute("INSERT OR IGNORE INTO refused_params VALUES(?,?)",
                                   (scenario, hash_params(c, r)))
                        space.matrix[j][i] = -1
                        mask.matrix[j][i] = 1

        db.commit()
        new_order = list(reversed(range(space.matrix.shape[0])))
        data = space.matrix[:][new_order]

        figsize=(12,6)

        _, ax = plt.subplots(1, 2, figsize=figsize, sharey=True)
        sns.heatmap(data, ax=ax[0], vmin=-1, vmax=1,
                    xticklabels=space.c,
                    yticklabels=list(reversed(space.r)), square=True)

        ax[0].set_title(title)

        new_order = list(reversed(range(mask.matrix.shape[0])))
        data = mask.matrix[:][new_order]

        sns.heatmap(data, ax=ax[1], vmin=-1, vmax=1,
                    xticklabels=space.c,
                    yticklabels=list(reversed(space.r)), square=True)

        # Set labels.
        ax[0].set_ylabel("Rows")
        ax[0].set_xlabel("Columns")
        ax[1].set_ylabel("Rows")
        ax[1].set_xlabel("Columns")

        # plt.tight_layout()
        # plt.gcf().set_size_inches(*figsize, dpi=300)

        viz.finalise(output)

        # 3D bars.
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/bars/{id}.png".format(id=scenario))
        space.bar3d(output=output, title=title, zlabel="Performance",
                    rotation=45)

        # Trisurfs.
        output = fs.path(experiment.IMG_ROOT,
                         "scenarios/trisurf/{id}.png".format(id=scenario))
        space.trisurf(output=output, title=title, zlabel="Performance",
                      rotation=45)

    #####################
    # ML Visualisations #
    #####################
    #features_tab(db, experiment.TAB_ROOT)

    visualise_classification_job(db, "xval")
    visualise_classification_job(db, "arch")
    visualise_classification_job(db, "xval_real")
    visualise_classification_job(db, "synthetic_real")

    # Runtime regression accuracy.
    visualise_regression_job(db, "xval")
    visualise_regression_job(db, "arch")
    visualise_regression_job(db, "xval_real")
    visualise_regression_job(db, "synthetic_real")

    # Whole-dataset plots
    visualise.runtimes_variance(db, fs.path(experiment.IMG_ROOT,
                                            "runtime_variance.png"),
                                min_samples=30)
    visualise.num_samples(db, fs.path(experiment.IMG_ROOT,
                                      "num_samples.png"))
    visualise.runtimes_range(db, fs.path(experiment.IMG_ROOT,
                                         "runtimes_range.png"))
    visualise.max_speedups(db, fs.path(experiment.IMG_ROOT,
                                       "max_speedups.png"))
    visualise.kernel_performance(db, fs.path(experiment.IMG_ROOT,
                                             "kernel_performance.png"))
    visualise.device_performance(db, fs.path(experiment.IMG_ROOT,
                                             "device_performance.png"))
    visualise.dataset_performance(db, fs.path(experiment.IMG_ROOT,
                                              "dataset_performance.png"))
    visualise.num_params_vs_accuracy(db, fs.path(experiment.IMG_ROOT,
                                                 "num_params_vs_accuracy.png"))
    visualise.performance_vs_coverage(db,
                                      fs.path(experiment.IMG_ROOT,
                                              "performance_vs_coverage.png"))
    visualise.performance_vs_max_wgsize(
        db, fs.path(experiment.IMG_ROOT, "performance_vs_max_wgsize.png")
    )
    visualise.performance_vs_wgsize(db, fs.path(experiment.IMG_ROOT,
                                                "performance_vs_wgsize.png"))
    visualise.performance_vs_wg_c(db, fs.path(experiment.IMG_ROOT,
                                              "performance_vs_wg_c.png"))
    visualise.performance_vs_wg_r(db, fs.path(experiment.IMG_ROOT,
                                              "performance_vs_wg_r.png"))
    visualise.max_wgsizes(db, fs.path(experiment.IMG_ROOT, "max_wgsizes.png"))
    visualise.oracle_speedups(db, fs.path(experiment.IMG_ROOT,
                                          "oracle_speedups.png"))

    visualise.coverage(db,
                       fs.path(experiment.IMG_ROOT, "coverage/coverage.png"))
    visualise.safety(db, fs.path(experiment.IMG_ROOT, "safety/safety.png"))
    visualise.oracle_wgsizes(db, fs.path(experiment.IMG_ROOT, "oracle/all.png"))

    # Per-device plots
    for i,device in enumerate(db.devices):
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE device='{0}')"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}.png".format(i))
        visualise.coverage(db, output=output, where=where, title=device)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=device)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where, title=device)

        where = ("scenario IN (\n"
                 "    SELECT id from scenarios WHERE device='{0}'\n"
                 ") AND scenario IN (\n"
                 "    SELECT id FROM scenarios WHERE kernel IN (\n"
                 "        SELECT id FROM kernel_names WHERE synthetic=0\n"
                 "    )\n"
                 ")"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}_real.png".format(i))
        visualise.coverage(db, output=output, where=where,
                           title=device + ", real")
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}_real.png".format(i))
        visualise.safety(db, output, where=where,
                         title=device + ", real")
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}_real.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where,
                                 title=device + ", real")


        where = ("scenario IN (\n"
                 "    SELECT id from scenarios WHERE device='{0}'\n"
                 ") AND scenario IN (\n"
                 "    SELECT id FROM scenarios WHERE kernel IN (\n"
                 "        SELECT id FROM kernel_names WHERE synthetic=1\n"
                 "    )\n"
                 ")"
                 .format(device))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/devices/{0}_synthetic.png".format(i))
        visualise.coverage(db, output=output, where=where,
                           title=device + ", synthetic")
        output = fs.path(experiment.IMG_ROOT,
                         "safety/devices/{0}_synthetic.png".format(i))
        visualise.safety(db, output, where=where,
                         title=device + ", synthetic")
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/devices/{0}_synthetic.png".format(i))
        visualise.oracle_wgsizes(db, output, where=where,
                                 title=device + ", synthetic")

    # Per-kernel plots
    for kernel,ids in db.lookup_named_kernels().iteritems():
        id_wrapped = ['"' + id + '"' for id in ids]
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE kernel IN ({0}))"
                 .format(",".join(id_wrapped)))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/kernels/{0}.png".format(kernel))
        visualise.coverage(db, output=output, where=where, title=kernel)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/kernels/{0}.png".format(kernel))
        visualise.safety(db, output=output, where=where, title=kernel)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/kernels/{0}.png".format(kernel))
        visualise.safety(db, output=output, where=where, title=kernel)

    # Per-dataset plots
    for i,dataset in enumerate(db.datasets):
        where = ("scenario IN "
                 "(SELECT id from scenarios WHERE dataset='{0}')"
                 .format(dataset))
        output = fs.path(experiment.IMG_ROOT,
                         "coverage/datasets/{0}.png".format(i))
        visualise.coverage(db, output, where=where, title=dataset)
        output = fs.path(experiment.IMG_ROOT,
                         "safety/datasets/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=dataset)
        output = fs.path(experiment.IMG_ROOT,
                         "oracle/datasets/{0}.png".format(i))
        visualise.safety(db, output, where=where, title=dataset)

    ml.stop()
Esempio n. 58
0
def assert_exists(*path_components, **kwargs):
    path = fs.path(*path_components)
    if not os.path.exists(path):
        exception = kwargs.get("exception", SmithException)
        raise exception("path '{}' does not exist".format(path))
    return path
Esempio n. 59
0
def visualise_classification_job(db, job):
    basedir = "img/classification/{}/".format(job)

    fs.mkdir(basedir)
    fs.mkdir(basedir + "classifiers")
    fs.mkdir(basedir + "err_fns")

    visualise.err_fn_performance(db, basedir + "err_fns.png", job=job)

    # Bar plot of all results.
    visualise.classification(db, "img/classification/{}.png".format(job),
                             job=job)

    # Per-classifier plots.
    for i,classifier in enumerate(db.classification_classifiers):
        visualise.classifier_speedups(db, classifier,
                                      basedir + "classifiers/{}.png".format(i),
                                      job=job)
    # Per-err_fn plots.
    for err_fn in db.err_fns:
        visualise.err_fn_speedups(db, err_fn,
                                  basedir + "err_fns/{}.png".format(err_fn),
                                  job=job, sort=True)

    # Results table.
    query = db.execute(
        "SELECT classifier,err_fn,Count(*) AS count\n"
        "FROM classification_results\n"
        "WHERE job=? GROUP BY classifier,err_fn",
        (job,)
    )
    results = []
    for classifier,err_fn,count in query:
        correct, illegal, refused, performance, speedup = zip(*[
            row for row in db.execute(
                "SELECT correct,illegal,refused,performance,speedup\n"
                "FROM classification_results\n"
                "WHERE job=? AND classifier=? AND err_fn=?",
                (job, classifier, err_fn)
            )
        ])
        results.append([
            classifier,
            err_fn,
            (sum(correct) / count) * 100,
            (sum(illegal) / count) * 100,
            (sum(refused) / count) * 100,
            min(performance) * 100,
            labmath.geomean(performance) * 100,
            max(performance) * 100,
            min(speedup),
            labmath.geomean(speedup),
            max(speedup)
        ])

    str_args = {
        "float_format": lambda f: "{:.2f}".format(f)
    }

    for i in range(len(results)):
        results[i][0] = ml.classifier_basename(results[i][0])

    columns=(
        "CLASSIFIER",
        "ERR_FN",
        "ACC %",
        "INV %",
        "REF %",
        "Omin %",
        "Oavg %",
        "Omax %",
        "Smin",
        "Savg",
        "Smax",
    )

    latex.table(results, output=fs.path(experiment.TAB_ROOT, job + ".tex"),
                columns=columns, **str_args)