예제 #1
0
    def save_all(self, outdir, force=False, fmt=None):
        """Save data as json or zip exports using an outdir root. If fmt is None,
        we use the extractor default (typically single-json except for metrics
        that warrant larger / more extraction).
        """
        if not self.manager or not self._extractors:
            logger.exit(
                "You must add a manager and do an extract() before save.")

        if fmt and fmt not in self.manager.export_formats:
            logger.exit("Export format %s is not recognized. Choose %s." %
                        (fmt, ", ".join(self.manager.export_formats)))
        package_dir = os.path.join(outdir, self.manager.name, self.manager.uri)
        logger.info("Results will be written to %s" % package_dir)

        for _, extractor in self._extractors.items():

            # Each metric can define a default format
            fmt_ = fmt or extractor.extractor

            # Do save based on selected type
            if fmt_ == "json-single":
                extractor.save_json_single(package_dir, force=force)
            elif fmt_ == "zip":
                extractor.save_zip(package_dir, force=force)
            else:
                extractor.save_json(package_dir, force=force)
예제 #2
0
    def load_metric(
        self,
        metric,
        filename=None,
        local_repository=None,
        repository="vsoch/caliper-metrics",
        subfolder="",
        branch="main",
        extension="json",
    ):
        """Load a metric from from a file, local caliper repository, or GitHub
        repo that has them extracted, optionally specifying a custom repository
        and subfolder. Smaller metrics are typically provided via json, and
        larger ones via zip.
        """
        # A manager is required
        if not self.manager:
            logger.exit("A manager is required to load a metric for.")

        if local_repository:
            return self._load_metric_local(local_repository, metric)
        elif filename:
            return self._load_metric_file(filename, metric)
        return self._load_metric_repo(metric, repository, subfolder, branch,
                                      extension)
예제 #3
0
 def run_command(self, cmd):
     """A wrapper to run_command to handle errors"""
     logger.debug(" ".join(cmd))
     response = run_command(cmd, quiet=self.quiet)
     if not response["return_code"] == 0:
         logger.exit("Error with %s, %s" % (" ".join(cmd), response["lines"]))
     return response["lines"]
예제 #4
0
def wget_and_extract(url,
                     download_to,
                     download_type="targz",
                     chunk_size=1024,
                     flatten=True):
    """Given a download url of a particular type (targz or wheel or zip)
    download to a folder and extract it. If flatten is true, we expect a top
    level folder that should be flattened into the current directory.
    """
    if download_type == "targz":
        dest, root, dest_dir = wget_and_extract_targz(url, download_to,
                                                      chunk_size)
    elif download_type in ["wheel", "gzip", "zip"]:
        dest, root, dest_dir = wget_and_extract_zip(url, download_to,
                                                    chunk_size)
    else:
        logger.exit("%s is not a known archive type." % download_type)

    # Remove the archive
    if os.path.exists(dest):
        os.remove(dest)

    # Move contents into top level folder
    if flatten and os.path.exists(root):
        move_files(root, dest_dir)

    # Remove the originally extracted folder
    if os.path.exists(root):
        shutil.rmtree(root)
    return dest_dir
예제 #5
0
 def get_analyzer(self):
     """Given the validated and loaded config, return the correct analyzer
     class depending on the packagemanger field. Currently we only support
     pypi
     """
     if re.search("pypi", self.config["packagemanager"], re.IGNORECASE):
         return CaliperPypiAnalyzer(self.config_file)
     logger.exit(
         "%s is not a supported package manager at this time."
         % self.config["packagemanager"]
     )
예제 #6
0
    def extract_metric(self, name, versions=None):
        """Given a metric, extract for each commit from the repository."""
        versions = versions or []
        if name not in self.metrics:
            logger.exit("Metric %s is not known." % name)

        # If no git repository defined, prepare one
        if not self.git:
            self.prepare_repository(versions)

        module, metric_name = self._metrics[name].rsplit(".", 1)
        metric = self.get_metric(name)
        metric.extract()
        self._extractors[metric_name] = metric
예제 #7
0
    def prepare_repository(self, versions=None):
        """Since most source code archives won't include the git history,
        we would want to create a root directly with a new git installation,
        and then create tagged commits that correpond to each version. We
        can then use this git repository to derive metrics of change.
        """
        versions = versions or []
        if not self.manager:
            logger.exit("A manager is required to prepare a repository.")

        # Create temporary git directory
        self.tmpdir = tempfile.mkdtemp(prefix="%s-" %
                                       self.manager.uri.replace("/", "-"))
        self.git = GitManager(self.tmpdir, quiet=self.quiet)

        # Initialize empty respository
        self.git.init()

        # If we have versions, filter down
        self.filter_versions(versions)

        # For each version, download and create git commit and tag
        for i, spec in enumerate(self.manager.specs):

            logger.info("Downloading and tagging %s, %s of %s" %
                        (spec["version"], i + 1, len(self.manager.specs)))
            download_to = os.path.join(
                self.tmpdir, os.path.basename(spec["source"]["filename"]))

            # Extraction type is based on source type
            wget_and_extract(
                url=spec["source"]["filename"],
                download_type=spec["source"]["type"],
                download_to=download_to,
            )

            # git add all content in folder, commit and tag with version
            self.git.add()
            self.git.status()
            os.listdir(self.tmpdir)
            self.git.commit(spec["version"])
            self.git.tag(spec["version"])

        logger.info("Repository for %s is created at %s" %
                    (self.manager, self.tmpdir))
        return self.git
예제 #8
0
def main(args, extra):

    # Ensure that all metrics are valid
    client = MetricsExtractor(quiet=True)
    metrics = args.metric.split(",")

    # If asking for all, we will do all regardless of other specifications
    if "all" in metrics:
        metrics = ["all"]

    for metric in metrics:
        if metric == "all":
            continue
        if metric not in client.metrics:
            logger.exit("%s is not a known metric." % metric)

    # prepare top level output directory
    outdir = args.outdir or os.getcwd()

    # Now parse the package names and do the extraction!
    for package in args.packages:
        uri, package = package.split(":")  # pypi:sif
        try:
            manager = get_named_manager(uri, package)
        except NotImplementedError:
            logger.exit("%s is not a valid package manager uri." % package)

        # Create a client to interact with
        client = MetricsExtractor(manager, quiet=True)

        # Honor the args.version
        versions = args.versions.split(",") if args.versions else None

        # Do the extraction
        for metric in metrics:
            if metric == "all":
                client.extract_all(versions=versions)
            else:
                client.extract_metric(metric, versions=versions)

        # Save results to files
        client.save_all(outdir, force=args.force, fmt=args.fmt)

        # Cleanup, unless disabled
        if not args.no_cleanup:
            client.cleanup(force=True)
예제 #9
0
def generate_graph(template, data, outdir, force):
    """given an html template, data to populate it, and an output directory,
    generate a plot. Known data attributes are:

      - datasets: a list of dataset, each having color, name, and values
      - title: the title for the html page

     Of course the template and data can be matched for each metric.
    """
    filename = os.path.join(outdir, "index.html")
    if os.path.exists(filename) and not force:
        logger.exit("%s exists, use --force to overwrite." % filename)
    template = Template("".join(read_file(template)))
    result = template.render(**data)
    if not os.path.exists(outdir):
        mkdir_p(outdir)
    write_file(filename, result)
    logger.info("Output written to %s" % filename)
예제 #10
0
def main(args, extra):

    # The config file must exist
    if not args.config or not os.path.exists(args.config):
        logger.exit(
            "You must provide an existing caliper.yaml config with --config.")

    client = CaliperAnalyzer(args.config)
    analyzer = client.get_analyzer()

    # serial argument removed for analyze, doesn't run well building containers
    analyzer.run_analysis(
        show_progress=not args.no_progress,
        nproc=args.nprocs,
        force=args.force,
        parallel=False,
        cleanup=args.cleanup,
    )
예제 #11
0
파일: view.py 프로젝트: atrisovic/caliper
def main(args, extra):

    # Ensure that all metrics are valid
    client = MetricsExtractor(quiet=True)

    # If the outdir is the present working directory
    outdir = os.getcwd() if args.outdir == "." else args.outdir

    # An input is required!
    if not args.input:
        logger.exit("An input results file is required.")

    # If the metric is not provided on the command line, needs to be in filename
    metric = args.metric or os.path.basename(args.input).split("-")[0]

    if not metric:
        logger.exit(
            "You must provide a --metric, not derivable from filename.")
    if metric not in client.metrics:
        logger.exit("%s is not a known metric." % metric)

    # prepare top level output directory
    outdir = args.outdir or os.getcwd()
    metric = client.get_metric(metric)
    metric.plot_results(args.input, outdir, force=args.force, title=args.title)
예제 #12
0
    def _load_config(self, config_file):
        """Given a caliper.yaml file, load the config an ensure that it is valid."""
        if not os.path.exists(config_file):
            logger.exit("%s does not exist." % config_file)
        self.config_file = config_file
        self.config_dir = os.path.abspath(os.path.dirname(self.config_file))
        self.config = read_yaml(config_file).get("analysis", {})
        self.outdir = os.path.join(self.config_dir, ".caliper")
        self.data_dir = os.path.join(self.outdir, "data")

        # Validate that required fields are present, and set
        required = ["packagemanager", "dependency"]
        for key in required:
            if key not in self.config or not self.config.get(key):
                logger.exit(
                    "%s is a required field in the caliper.yaml config under the analysis key."
                    % key
                )

        # Set the Dockerfile, ensure it exists
        self.dockerfile = os.path.join(
            self.config_dir, self.config.get("dockerfile", "Dockerfile")
        )
        if not os.path.exists(self.dockerfile):
            logger.exit("The Dockerfile does not exist.")

        # Set the dependency name and any additional args
        self.dependency = self.config.get("dependency")
        self.args = self.config.get("args", {})

        # Filter to specific python and library versions
        self.python_versions = self.config.get("python_versions", [])
        self.test_versions = self.config.get("versions", [])
        for dirname in [self.outdir, self.data_dir]:
            if not os.path.exists(dirname):
                os.makedirs(dirname)
예제 #13
0
def analysis_task(**kwargs):
    """A shared analysis task for the serial or parallel workers. We will
    read in the Dockerfile template, and generate and run/test a container
    for a particular Python version, etc.
    """
    # Ensure all arguments are provided
    for key in [
            "name",
            "outdir",
            "dependency",
            "outfile",
            "dockerfile",
            "exists",
    ]:
        if key not in kwargs or kwargs.get(key) == None:
            logger.exit("%s is missing or undefined for analysis task." % key)

    dockerfile = kwargs.get("dockerfile")
    outfile = kwargs.get("outfile")
    cleanup = kwargs.get("cleanup", False)
    dependency = kwargs.get("dependency")
    force = kwargs.get("force", False)
    exists = kwargs.get("exists")
    name = kwargs.get("name")
    outdir = kwargs.get("outdir")
    result = {"inputs": kwargs}
    tests = kwargs.get("tests")
    tests = [] if not tests else tests.split("\n")
    worker_id = multiprocessing.current_process().name

    # If the output file already exists and force is true, overwrite
    if os.path.exists(outfile) and not force:
        return

    # If it doesn't exist, we wouldn't be able to build it, cut out early
    if not exists:
        result["build_retval"] = 1
        write_json(result, outfile)
        return

    # Build temporary Dockerfile
    dockerfile_name = "Dockerfile.caliper.%s" % name
    dockerfile_fullpath = os.path.join(tempfile.gettempdir(), dockerfile_name)

    # Write and build temporary Dockerfile, and build the container
    write_file(dockerfile_fullpath, dockerfile)
    container_name = "%s-container:%s" % (dependency, name)
    sys.stdout.write("[%s] 0 of %s - building container %s\n" %
                     (worker_id, len(tests), container_name))
    runner = CommandRunner()
    runner.run_command(
        [
            "docker",
            "build",
            "-f",
            dockerfile_fullpath,
            "-t",
            container_name,
            ".",
        ],
        cwd=outdir,
    )

    # Clean up Dockerfile
    if os.path.exists(dockerfile_fullpath):
        os.remove(dockerfile_fullpath)

    # Keep a result for each script
    result["tests"] = {"build": {"retval": runner.retval}}
    if runner.retval != 0:
        result["tests"]["build"]["error"] = runner.error
        write_json(result, outfile)
        return

    # Get packages installed for each container
    runner.run_command(["docker", "run", container_name, "pip", "freeze"])
    result["requirements.txt"] = runner.output

    # Test basic import of library
    test_results = {}

    # Run each test
    for i, script in enumerate(tests):
        start = time.time()
        sys.stdout.write("[%s] %s of %s - %s" %
                         (worker_id, i + 1, len(tests), script))
        runner.run_command(
            ["docker", "run", "--rm", container_name, "python", script])
        end = time.time()
        test_results[script] = {
            "error": runner.error,
            "output": runner.output,
            "retval": runner.retval,
            "seconds": round(end - start, 2),
        }
        sys.stdout.write(" total time: %s seconds \n" %
                         test_results[script]["seconds"])
        sys.stdout.flush()

    # Update results with all tests
    result["tests"].update(test_results)

    # Save the result to file, clean up
    write_json(result, outfile)
    runner.run_command(["docker", "rmi", container_name, "--force"])
    runner.run_command(["docker", "images", "-f", "dangling=true", "-q"])
    for layer in runner.output:
        runner.run_command(["docker", "rmi", layer.strip("\n"), "--force"])
    if cleanup:
        runner.run_command(["docker", "system", "prune", "--all", "--force"])
예제 #14
0
    def run(self):
        """run will send a list of tasks, a tuple with arguments, through a function.
        The tasks should be added with add_task.
        """
        # Keep track of some progress for the user
        total = len(self.tasks)

        # if we don't have tasks, don't run
        if not self.tasks:
            return

        # results will also have the same key to look up
        finished = dict()
        results = []

        try:
            pool = multiprocessing.Pool(self.workers, init_worker)

            self.start()
            progress = 1
            logger.info("Preparing %s tasks..." % total)
            for key, task in self.tasks.items():
                func, params = task
                if self.show_progress:
                    prefix = "[%s/%s]" % (progress, total)
                    logger.show_progress(progress,
                                         total,
                                         length=35,
                                         prefix=prefix)
                result = pool.apply_async(multi_wrapper,
                                          multi_package(func, [params]))

                # Store the key with the result
                results.append((key, result))
                progress += 1

            progress = 1
            logger.info("Waiting for results...")
            while len(results) > 0:
                pair = results.pop()
                key, result = pair
                if self.show_progress:
                    prefix = "[%s/%s]" % (progress, total)
                    logger.show_progress(progress,
                                         total,
                                         length=35,
                                         prefix=prefix)
                result.wait()
                progress += 1
                finished[key] = result.get()

            self.end()
            pool.close()
            pool.join()

        except (KeyboardInterrupt, SystemExit):
            logger.error("Keyboard interrupt detected, terminating workers!")
            pool.terminate()
            sys.exit(1)

        except:
            logger.exit("Error running task.")

        return finished
예제 #15
0
    def run_analysis(
        self,
        release_filter=None,
        nproc=None,
        parallel=False,
        show_progress=True,
        func=None,
        force=False,
        cleanup=False,
    ):
        """Once the config is loaded, run the analysis."""
        # The release filter is a regular expression we use to find the correct
        # platform / architecture. We select linux wheels and source
        release_filter = release_filter or "(.*manylinux.*x86_64.*|[.]tar[.]gz)"
        func = func or analysis_task

        # prepare a command runner, check that docker is installed
        runner = CommandRunner()
        runner.run_command(["which", "docker"])
        if runner.retval != 0:
            logger.exit("Docker must be installed to build containers.")

        # Prepare arguments for runner, whether it's serial or parallel
        manager = PypiManager(self.dependency)
        all_releases = manager.filter_releases(release_filter)
        python_versions = manager.get_python_versions()
        python_version_regex = "(%s)" % "|".join(self.python_versions)

        # Read in the template, populate with each deps version
        template = Template(read_file(self.dockerfile, readlines=False))

        # Prepare arguments to build and test a container for each
        tasks = {}

        # Loop over versions of the library, and Python versions
        for version, releases in all_releases.items():

            # Check if the user has defined a set of versions
            if self.test_versions and version not in self.test_versions:
                continue

            # Create a lookup based on Python version
            lookup = {x["python_version"]: x for x in releases}

            for python_version in python_versions:

                # If the user has requested a subset of Python versions
                if self.python_versions and not re.search(
                    python_version_regex, python_version, re.IGNORECASE
                ):
                    continue

                name = "%s-%s-%s-python-%s" % (
                    self.name,
                    self.dependency,
                    version,
                    python_version,
                )
                outfile = os.path.join(self.data_dir, "%s.json" % name)
                spec = lookup.get(python_version, {})
                tests = "\n".join(self.config.get("tests"))

                # If the Python version is not in the lookup we cannot do a build
                exists = python_version in lookup

                # It's easier to pass the rendered template than all arguments for it
                container_base = "python:%s" % ".".join(
                    [x for x in python_version.lstrip("cp")]
                )
                result = template.render(
                    base=container_base,
                    filename=spec.get("url", ""),
                    basename=spec.get("filename", ""),
                    **self.args
                )
                params = {
                    "dependency": self.dependency,
                    "outfile": outfile,
                    "dockerfile": result,
                    "force": force,
                    "exists": exists,
                    "name": name,
                    "tests": tests,
                    "cleanup": cleanup,
                    "outdir": self.config_dir,
                }
                tasks[name] = (func, params)

        if parallel:
            return self._run_parallel(tasks, nproc, show_progress)
        return self._run_serial(tasks)
예제 #16
0
__author__ = "Vanessa Sochat"
__copyright__ = "Copyright 2020-2021, Vanessa Sochat"
__license__ = "MPL 2.0"

from caliper.utils.file import mkdir_p, read_file, write_file
from caliper.logger import logger
import os

here = os.path.abspath(os.path.dirname(__file__))

try:
    from jinja2 import Template
except ImportError:
    logger.exit("You must install jinja2 to use graphs.")


def generate_graph(template, data, outdir, force):
    """given an html template, data to populate it, and an output directory,
    generate a plot. Known data attributes are:

      - datasets: a list of dataset, each having color, name, and values
      - title: the title for the html page

     Of course the template and data can be matched for each metric.
    """
    filename = os.path.join(outdir, "index.html")
    if os.path.exists(filename) and not force:
        logger.exit("%s exists, use --force to overwrite." % filename)
    template = Template("".join(read_file(template)))
    result = template.render(**data)
    if not os.path.exists(outdir):