Esempio n. 1
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            continue

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
Esempio n. 2
0
    def meta(self) -> dict:
        """
        Get trained model metadata.

        Format spec: https://github.com/ChrisCummins/clgen/issues/25

        Returns:
            dict: Metadata.
        """
        # checksum corpus and model cache files. Paths are relative to cache
        # root.
        cache_root_re = r'^' + cache.ROOT + '/'
        corpus_files = dict(
            (re.sub(cache_root_re, "", x), clgen.checksum_file(x))
            for x in fs.ls(self.corpus.cache.path, abspaths=True))
        model_files = dict(
            (re.sub(cache_root_re, "", x), clgen.checksum_file(x))
            for x in fs.ls(self.cache.path, abspaths=True))

        contents = corpus_files.copy()
        contents.update(model_files)

        _meta = deepcopy(self.opts)
        _meta["version"] = clgen.version()
        _meta["date_packaged"] = labtime.nowstr()
        _meta["corpus"] = self.corpus.meta,
        _meta["contents"] = contents

        return _meta
Esempio n. 3
0
def get_all_sampler_datasets(all_clgen_versions: bool=True) -> list:
    if all_clgen_versions:
        versiondirs = fs.ls(fs.path("~/.cache/clgen"), abspaths=True)
    else:
        versiondirs = [fs.path("~/.cache/clgen", clgen.version())]

    versiondirs = [v for v in versiondirs if fs.isdir(v, "sampler")]

    datasets = []
    for versiondir in versiondirs:
        for samplerdir in fs.ls(fs.path(versiondir, "sampler"), abspaths=True):
            inpath = fs.path(samplerdir, "kernels.db")
            if fs.isfile(inpath):
                datasets.append(inpath)

    return datasets
Esempio n. 4
0
def set_version_meta(path: str, version: str=clgen.version()) -> None:
    """
    Set the "version" key in an database.

    This is useful for marking version requirements of specific datasets, e.g.
    a databse schema which requires a particular CLgen version, or a scheme
    which is likely to change in the future.

    Parameters
    ----------
    path : str
        Path to database.
    version : str, optional
        Version value (defaults to CLgen version).
    """
    set_meta(path, "version", version)
Esempio n. 5
0
def version_meta_matches(path: str, version: str=clgen.version()) -> bool:
    """
    Check that the "version" key in a database matches the expected value.

    If the database does not have a "version" key in the Meta table, returns
    False.

    Parameters
    ----------
    path : str
        Path to database.
    version : str, optional
        Version value (defaults to CLgen version).

    Returns
    -------
    bool
        True if version in database matches expected version, else False.
    """
    return get_meta(path, "version") == version
Esempio n. 6
0
def main():
    log.init(verbose=True)

    m = model.from_json(clgen.load_json_file(sys.argv[1]))
    c = corpus.Corpus.from_json({"path": "~/data/github"})
    print("CLgen:      ", clgen.version())
    print("Corpus size:", c.size)
    print("Vocab size: ", c.vocab_size)

    m.train()

    p, _ = corpus.most_common_prototypes(c, 20)
    for i, row in enumerate(p):
        outpath = "./inference-p" + str(i + 1) + "-" + fs.basename(sys.argv[1])
        if fs.exists(outpath):
            print("skipped result for", outpath)
            continue
        else:
            print("starting result for", outpath)

        _, prototype = row
        argspec = [' '.join(x.split()[:-1]) for x in prototype.split(',')]
        print("argspec", ','.join([str(x) for x in argspec]))
        s = sampler.from_json({
            "kernels": {
                "args": argspec,
                "max_length": 5000
            },
            "sampler": {
                "batch_size": 2000,
                "max_batches": 1,
                "static_checker": False,
                "dynamic_checker": False
            }
        })

        info = evaluate(m, s)
        clgen.write_file(outpath, clgen.format_json(info))
Esempio n. 7
0
from time import time
from typing import Iterable, List, Tuple

import clgen
from clgen import dbutil
from clgen import features
from clgen import log


# Default options used for corpus. Any values provided by the user will override
# these defaults.
DEFAULT_CORPUS_OPTS = {
    "created": {
        "author": clgen.get_default_author(),
        "date": str(datetime.now()),
        "version": clgen.version(),
    },
    "eof": False,
    "batch_size": 50,
    "seq_length": 50,
    "vocabulary": "char",
    "encoding": "default",
    "preprocess": True,
    "preserve_order": False,
    "language": None,   # Note no explicit default language.
}


class FeaturesError(clgen.CLgenError):
    """
    Thrown in case of error during features encoding.
Esempio n. 8
0
def print_version_and_exit():
    """
    Print the clgen version. This function does not return.
    """
    print("clgen ", clgen.version())
    exit(0)
Esempio n. 9
0
# You should have received a copy of the GNU General Public License
# along with CLgen.  If not, see <http://www.gnu.org/licenses/>.
#
"""
CLgen persistent cache mechanism.
"""
import re

from labm8 import fs
from shutil import move
from six import string_types

import clgen
from clgen import log

ROOT = fs.path("~", ".cache", "clgen", clgen.version())


class Cache404(clgen.File404):
    """
    Error thrown for cache misses.
    """
    pass


class Cache(clgen.CLgenObject):
    """
    Persistent filesystem cache.
    """
    def __init__(self, name: str):
        """
Esempio n. 10
0
def main(self, args: List[str]=sys.argv[1:]):
    """
    A deep learning program generator for the OpenCL programming language.

    The core operations of CLgen are:

       1. OpenCL files are collected from a model specification file.
       2. These files are preprocessed into an OpenCL kernel database.
       3. A training corpus is generated from the input files.
       4. A machine learning model is trained on the corpus of files.
       5. The trained model is sampled for new kernels.
       6. The samples are tested for compilability.

    This program automates the execution of all six stages of the pipeline.
    The pipeline can be interrupted and resumed at any time. Results are cached
    across runs. If installed with CUDA support, NVIDIA GPUs will be used to
    improve performance where possible.
    """
    parser = ArgumentParser(
        prog="clgen",
        description=inspect.getdoc(self),
        epilog="""
For information about a specific command, run `clgen <command> --help`.

""" + __help_epilog__,
        formatter_class=RawDescriptionHelpFormatter)

    # TODO:
    # parser.add_argument(
    #     "-l", "--lang", metavar="<language>",
    #     help="programming language (default: OpenCL)")
    parser.add_argument(
        "-v", "--verbose", action="store_true",
        help="increase output verbosity")
    parser.add_argument(
        "--version", action="store_true",
        help="show version information and exit")
    parser.add_argument(
        "--debug", action="store_true",
        help="in case of error, print debugging information")
    parser.add_argument(
        "--profile", action="store_true",
        help=("enable internal API profiling. When combined with --verbose, "
              "prints a complete profiling trace"))

    parser.add_argument(
        "--corpus-dir", metavar="<corpus>",
        type=FileType("r"),
        help="print path to corpus cache")
    parser.add_argument(
        "--model-dir", metavar="<model>",
        type=FileType("r"),
        help="print path to model cache")
    parser.add_argument(
        "--sampler-dir", metavar=("<model>", "<sampler>"),
        type=FileType("r"), nargs=2,
        help="print path to sampler cache")

    subparser = parser.add_subparsers(title="available commands")

    subparsers = [
        _register_test_parser,
        _register_train_parser,
        _register_sample_parser,
        _register_db_parser,
        _register_fetch_parser,
        _register_ls_parser,
        _register_preprocess_parser,
        _register_features_parser,
        _register_atomize_parser,
        _register_cache_parser,
    ]

    for register_fn in subparsers:
        register_fn(subparser)

    args = parser.parse_args(args)

    # set log level
    log.init(args.verbose)

    # set debug option
    if args.debug:
        os.environ["DEBUG"] = "1"

    # set profile option
    if args.profile:
        prof.enable()

    # options whch override the normal argument parsing process.
    if args.version:
        version = clgen.version()
        print(f"clgen {version} made with \033[1;31m♥\033[0;0m by "
              "Chris Cummins <*****@*****.**>.")
    elif args.corpus_dir:
        model = clgen.Model.from_json(jsonutil.loads(args.corpus_dir.read()))
        print(model.corpus.cache.path)
    elif args.model_dir:
        model = clgen.Model.from_json(jsonutil.loads(args.model_dir.read()))
        print(model.cache.path)
    elif args.sampler_dir:
        model = clgen.Model.from_json(jsonutil.loads(args.sampler_dir[0].read()))
        sampler = clgen.Sampler.from_json(jsonutil.loads(args.sampler_dir[1].read()))
        print(sampler.cache(model).path)
    else:
        # strip the arguments from the top-level parser
        dispatch_func = args.dispatch_func
        opts = vars(args)
        del opts["version"]
        del opts["verbose"]
        del opts["debug"]
        del opts["profile"]
        del opts["corpus_dir"]
        del opts["model_dir"]
        del opts["sampler_dir"]
        del opts["dispatch_func"]

        run(dispatch_func, **opts)