Example #1
0
    def get_package_metadata(self, name=None):
        """Given a package name, retrieve it's metadata from pypi"""
        name = name or self.package_name
        if not name:
            raise ValueError("A package name is required.")

        # At some point we might need to add pagination
        url = "%s/repos/%s/releases?per_page=100" % (self.baseurl, name)
        self.metadata = do_request(url, headers=self._get_headers())

        # Parse metadata into simplified version of spack package schema
        for release in self.metadata:

            self._specs.append({
                "name": name,
                "version": release["tag_name"],
                "source": {
                    "filename": release["tarball_url"],
                    "type": "targz",
                },
                "hash": None,
            })

        # Must sort by version or won't work
        self._specs = self.sort_specs(self._specs, by="version")
        logger.info("Found %s versions for %s" % (len(self._specs), name))
        return self._specs
Example #2
0
    def save_all(self, outdir, force=False, fmt=None):
        """Save data as json or zip exports using an outdir root. If fmt is None,
        we use the extractor default (typically single-json except for metrics
        that warrant larger / more extraction).
        """
        if not self.manager or not self._extractors:
            logger.exit(
                "You must add a manager and do an extract() before save.")

        if fmt and fmt not in self.manager.export_formats:
            logger.exit("Export format %s is not recognized. Choose %s." %
                        (fmt, ", ".join(self.manager.export_formats)))
        package_dir = os.path.join(outdir, self.manager.name, self.manager.uri)
        logger.info("Results will be written to %s" % package_dir)

        for _, extractor in self._extractors.items():

            # Each metric can define a default format
            fmt_ = fmt or extractor.extractor

            # Do save based on selected type
            if fmt_ == "json-single":
                extractor.save_json_single(package_dir, force=force)
            elif fmt_ == "zip":
                extractor.save_zip(package_dir, force=force)
            else:
                extractor.save_json(package_dir, force=force)
Example #3
0
    def _load_metric_repo(self, metric, repository, subfolder, branch,
                          extension):
        """helper function to load a metric from a repository."""
        # If we have a subfolder, add // around it
        if subfolder:
            subfolder = "%s/" % subfolder.strip("/")
        manager = self.manager.replace(":", "/")

        # Load the index for the metric, must exist for all output types
        url = "https://raw.githubusercontent.com/%s/%s/%s%s/%s/index.json" % (
            repository,
            branch,
            subfolder,
            manager,
            metric,
        )

        logger.info("Downloading %s" % url)
        response = requests.get(url)
        if response.status_code == 200:
            index = response.json()
            data = index.get("data", {})

            # Parse a metric repository, meaning reading the index.json
            return self._read_metric_repo(url, index, data, metric, extension)
Example #4
0
    def get_package_metadata(self, name=None, arch=None, python_version=None):
        """Given a package name, retrieve it's metadata from pypi. Given an arch
        regex and python version, we look for a particular architecture. Otherwise
        the choices are a bit random.
        """
        # Note that without specifying an arch and python version, the
        # architecture returned can be fairly random.

        # Parse metadata into simplified version of spack package schema
        for version, releases in self.releases.items():

            # Find an appropriate linux/unix flavor release to extract
            release = self.find_release(releases, arch, python_version)

            # Some releases can be empty, skip
            if not releases or not release:
                continue

            # Release type drives the extraction logic
            release_type = "wheel" if release["url"].endswith(
                "whl") else "targz"
            self._specs.append({
                "name": name,
                "version": version,
                "source": {
                    "filename": release["url"],
                    "type": release_type,
                },
                "hash": release["digests"]["sha256"],
            })

        # Pypi is already sorted by version (at least it seems)
        logger.info("Found %s versions for %s" %
                    (len(self._specs), name or self.package_name))
        return self._specs
Example #5
0
    def prepare_repository(self, versions=None):
        """Since most source code archives won't include the git history,
        we would want to create a root directly with a new git installation,
        and then create tagged commits that correpond to each version. We
        can then use this git repository to derive metrics of change.
        """
        versions = versions or []
        if not self.manager:
            logger.exit("A manager is required to prepare a repository.")

        # Create temporary git directory
        self.tmpdir = tempfile.mkdtemp(prefix="%s-" %
                                       self.manager.uri.replace("/", "-"))
        self.git = GitManager(self.tmpdir, quiet=self.quiet)

        # Initialize empty respository
        self.git.init()

        # If we have versions, filter down
        self.filter_versions(versions)

        # For each version, download and create git commit and tag
        for i, spec in enumerate(self.manager.specs):

            logger.info("Downloading and tagging %s, %s of %s" %
                        (spec["version"], i + 1, len(self.manager.specs)))
            download_to = os.path.join(
                self.tmpdir, os.path.basename(spec["source"]["filename"]))

            # Extraction type is based on source type
            wget_and_extract(
                url=spec["source"]["filename"],
                download_type=spec["source"]["type"],
                download_to=download_to,
            )

            # git add all content in folder, commit and tag with version
            self.git.add()
            self.git.status()
            os.listdir(self.tmpdir)
            self.git.commit(spec["version"])
            self.git.tag(spec["version"])

        logger.info("Repository for %s is created at %s" %
                    (self.manager, self.tmpdir))
        return self.git
Example #6
0
def generate_graph(template, data, outdir, force):
    """given an html template, data to populate it, and an output directory,
    generate a plot. Known data attributes are:

      - datasets: a list of dataset, each having color, name, and values
      - title: the title for the html page

     Of course the template and data can be matched for each metric.
    """
    filename = os.path.join(outdir, "index.html")
    if os.path.exists(filename) and not force:
        logger.exit("%s exists, use --force to overwrite." % filename)
    template = Template("".join(read_file(template)))
    result = template.render(**data)
    if not os.path.exists(outdir):
        mkdir_p(outdir)
    write_file(filename, result)
    logger.info("Output written to %s" % filename)
Example #7
0
    def _run_serial(self, tasks, show_progress=True):
        """Run tasks in serial. The workers save result files, so we don't
        care about the results (would take more memory to try and return the
        same content).
        """
        progress = 1
        total = len(tasks)

        results = {}
        for key, task in tasks.items():
            func, params = task
            prefix = "[%s/%s]" % (progress, total)
            if show_progress:
                logger.info("%s: %s" % (prefix, key))
            else:
                logger.info("Processing task %s" % key)

            results[key] = func(**params)
            progress += 1

        return results
Example #8
0
    def run(self):
        """run will send a list of tasks, a tuple with arguments, through a function.
        The tasks should be added with add_task.
        """
        # Keep track of some progress for the user
        total = len(self.tasks)

        # if we don't have tasks, don't run
        if not self.tasks:
            return

        # results will also have the same key to look up
        finished = dict()
        results = []

        try:
            pool = multiprocessing.Pool(self.workers, init_worker)

            self.start()
            progress = 1
            logger.info("Preparing %s tasks..." % total)
            for key, task in self.tasks.items():
                func, params = task
                if self.show_progress:
                    prefix = "[%s/%s]" % (progress, total)
                    logger.show_progress(progress,
                                         total,
                                         length=35,
                                         prefix=prefix)
                result = pool.apply_async(multi_wrapper,
                                          multi_package(func, [params]))

                # Store the key with the result
                results.append((key, result))
                progress += 1

            progress = 1
            logger.info("Waiting for results...")
            while len(results) > 0:
                pair = results.pop()
                key, result = pair
                if self.show_progress:
                    prefix = "[%s/%s]" % (progress, total)
                    logger.show_progress(progress,
                                         total,
                                         length=35,
                                         prefix=prefix)
                result.wait()
                progress += 1
                finished[key] = result.get()

            self.end()
            pool.close()
            pool.join()

        except (KeyboardInterrupt, SystemExit):
            logger.error("Keyboard interrupt detected, terminating workers!")
            pool.terminate()
            sys.exit(1)

        except:
            logger.exit("Error running task.")

        return finished