Ejemplo n.º 1
0
def translate_and_save_entry(findings, artifact):
    """
    Handler function to map syft results for the python package type into the engine "raw" document format.
    """
    if "python-package-cataloger" not in artifact["foundBy"]:
        # engine only includes python findings for egg and wheel installations (with rich metadata)
        return

    site_pkg_root = artifact["metadata"]["sitePackagesRootPath"]
    name = artifact["name"]

    # anchore engine always uses the name, however, the name may not be a top-level package
    # instead default to the first top-level package unless the name is listed among the
    # top level packages explicitly defined in the metadata. Note that the top-level package
    # is optional!
    pkg_key_names = dig(artifact,
                        "metadata",
                        "topLevelPackages",
                        force_default=[])
    pkg_key_name = None
    for key_name in pkg_key_names:
        if name in key_name:
            pkg_key_name = name
        else:
            pkg_key_name = key_name

    if not pkg_key_name:
        pkg_key_name = name

    pkg_key = os.path.join(site_pkg_root, pkg_key_name)
    origin = dig(artifact, "metadata", "author", force_default="")
    email = dig(artifact, "metadata", "authorEmail", default=None)
    if email:
        origin += " <%s>" % email

    files = []
    for file in dig(artifact, "metadata", "files", force_default=[]):
        files.append(os.path.join(site_pkg_root, file["path"]))

    # craft the artifact document
    pkg_value = {
        "name": name,
        "version": artifact["version"],
        "latest": artifact["version"],
        "files": files,
        "origin": origin,
        "license": dig(artifact, "metadata", "license", force_default=""),
        "location": site_pkg_root,
        "type": "python",
        "cpes": artifact.get("cpes", []),
    }

    # inject the artifact document into the "raw" analyzer document
    save_entry(findings, pkg_value, pkg_key)
Ejemplo n.º 2
0
def convert_syft_to_engine(all_results):
    """
    Do the conversion from syft format to engine format

    :param all_results:
    :return:
    """

    # transform output into analyzer-module/service "raw" analyzer json document
    nested_dict = lambda: collections.defaultdict(nested_dict)
    findings = nested_dict()

    # This is the only use case for consuming the top-level results from syft,
    # capturing the information needed for BusyBox. No artifacts should be
    # expected, and having outside of the artifacts loop ensure this will only
    # get called once.
    distro = all_results.get("distro")
    if distro and distro.get("name", "").lower() == "busybox":
        findings["package_list"]["pkgs.all"]["base"]["BusyBox"] = distro[
            "version"]
    elif not distro or not distro.get("name"):
        findings["package_list"]["pkgs.all"]["base"]["Unknown"] = "0"

    # take a sub-set of the syft findings and invoke the handler function to
    # craft the artifact document and inject into the "raw" analyzer json
    # document
    for artifact in filter_artifacts(
            all_results["artifacts"],
            dig(all_results, "artifactRelationships", force_default=[]),
    ):
        handler = modules_by_artifact_type[artifact["type"]]
        handler.translate_and_save_entry(findings, artifact)

    return defaultdict_to_dict(findings)
Ejemplo n.º 3
0
def _all_packages_plus_source(findings, artifact):
    name = artifact["name"]
    version = artifact["version"]

    origin_package = dig(artifact, "metadata", "originPackage")

    findings["package_list"]["pkgs_plus_source.all"]["base"][name] = version
    if origin_package:
        findings["package_list"]["pkgs_plus_source.all"]["base"][
            origin_package] = version
Ejemplo n.º 4
0
def _all_package_files(findings, artifact):
    for file in dig(artifact, "metadata", "files", force_default=[]):
        original_path = file.get("path")
        if not original_path.startswith("/"):
            # the 'alpine-baselayout' package is installed relative to root,
            # however, syft reports this as an absolute path
            original_path = "/" + original_path

        # anchore-engine considers all parent paths to also be a registered apkg path (except root)
        findings["package_list"]["pkgfiles.all"]["base"][
            original_path] = "APKFILE"
Ejemplo n.º 5
0
def translate_and_save_entry(findings, artifact):
    """
    Handler function to map syft results for the gem package type into the
    engine "raw" document format.
    """
    pkg_key = artifact["locations"][0]["path"]
    name = artifact["name"]
    versions = [artifact["version"]]

    # craft the artifact document
    pkg_value = {
        "name": name,
        "versions": versions,
        "latest": dig(artifact, "version", force_default=""),
        "sourcepkg": dig(artifact, "metadata", "homepage", force_default=""),
        "files": dig(artifact, "metadata", "files", force_default=[]),
        "origins": dig(artifact, "metadata", "authors", force_default=[]),
        "lics": dig(artifact, "metadata", "licenses", force_default=[]),
        "cpes": artifact.get("cpes", []),
    }

    save_entry(findings, pkg_value, pkg_key)
Ejemplo n.º 6
0
    def filter_fn(artifact):
        # syft may do more work than what is supported in engine, ensure we only include artifacts
        # of select package types.
        if artifact["type"] not in modules_by_artifact_type:
            return False

        # some packages are owned by other packages (e.g. a python package that was installed
        # from an RPM instead of with pip), filter out any packages that are not "root" packages.
        if filter_relationships(relationships,
                                child=dig(artifact, "id"),
                                type="ownership-by-file-overlap"):
            return False

        return True
Ejemplo n.º 7
0
def translate_and_save_entry(findings, artifact):
    """
    Handler function to map syft results for npm package type into the engine "raw" document format.
    """
    pkg_key = artifact["locations"][0]["path"]
    name = artifact["name"]
    homepage = dig(artifact, "metadata", "homepage", force_default="")
    author = dig(artifact, "metadata", "author", force_default="")
    authors = dig(artifact, "metadata", "authors", force_default=[])
    origins = [] if not author else [author]
    origins.extend(authors)

    pkg_value = {
        "name": name,
        "versions": [artifact["version"]],
        "latest": artifact["version"],
        "sourcepkg": dig(artifact, "metadata", "url", force_default=homepage),
        "origins": origins,
        "lics": dig(artifact, "metadata", "licenses", force_default=[]),
        "cpes": artifact.get("cpes", []),
    }

    # inject the artifact document into the "raw" analyzer document
    save_entry(findings, pkg_value, pkg_key)
Ejemplo n.º 8
0
def _all_package_info(findings, artifact):
    name = artifact["name"]
    version = artifact["version"]
    release = dig(artifact, "metadata", "release")

    if release:
        version = artifact["version"] + "-" + release

    maintainer = dig(artifact, "metadata", "maintainer")
    if maintainer:
        maintainer += " (maintainer)"

    size = dig(artifact, "metadata", "installedSize")
    if size:
        # convert KB to Bytes
        size = size * 1000
    else:
        size = "N/A"

    source = dig(artifact, "metadata", "source")
    source_version = dig(artifact, "metadata", "sourceVersion")

    # Normalize this for downstream consumption etc. Eventually we want to leave it split out, but for now needs a join
    if source and source_version:
        source = source + "-" + source_version
    elif source:
        source = source + "-" + version
    else:
        source = "N/A"

    license = dig(artifact, "licenses")
    if license:
        license = " ".join(license)
    else:
        license = "Unknown"

    pkg_value = {
        "version": version,
        "sourcepkg": source,
        "arch": dig(artifact, "metadata", "architecture", force_default="N/A"),
        "origin": maintainer or "N/A",
        "release": "N/A",
        "size": str(size),
        "license": license,
        "type": "dpkg",
        "cpes": artifact.get("cpes", []),
    }

    save_entry(findings, pkg_value, name)
Ejemplo n.º 9
0
def _all_package_info(findings, artifact):
    name = artifact["name"]
    version = artifact["version"]
    release = dig(artifact, "metadata", "release")

    if release:
        version = artifact["version"] + "-" + release

    maintainer = dig(artifact, "metadata", "maintainer")
    if maintainer:
        maintainer += " (maintainer)"

    size = dig(artifact, "metadata", "installedSize")
    if size:
        # convert KB to Bytes
        size = size * 1000
    else:
        size = "N/A"

    source = dig(artifact, "metadata", "source")
    if source:
        source = source.split(" ")[0] + "-" + version
    else:
        source = "N/A"

    license = dig(artifact, "licenses") or dig(artifact, "license")
    if license:
        license = " ".join(license)
    else:
        license = "Unknown"

    pkg_value = {
        "version": version,
        "sourcepkg": source,
        "arch": dig(artifact, "metadata", "architecture", force_default="N/A"),
        "origin": maintainer or "N/A",
        "release": "N/A",
        "size": str(size),
        "license": license,
        "type": "dpkg",
        "cpes": artifact.get("cpes", []),
    }

    save_entry(findings, pkg_value, name)
Ejemplo n.º 10
0
def _all_package_info(findings, artifact):
    name = artifact["name"]
    version = artifact["version"]

    release = "N/A"
    version_pattern = re.match(r"(\S*)-(\S*)", version)
    if version_pattern:
        version = version_pattern.group(1) or version
        release = version_pattern.group(2) or "N/A"

    pkg_value = {
        "name":
        name,
        "version":
        version,
        "sourcepkg":
        dig(artifact, "metadata", "originPackage", force_default="N/A"),
        "arch":
        dig(artifact, "metadata", "architecture", force_default="N/A"),
        "origin":
        dig(artifact, "metadata", "maintainer", force_default="N/A"),
        "release":
        release,
        "size":
        str(dig(artifact, "metadata", "installedSize", force_default="N/A")),
        "license":
        dig(artifact, "metadata", "license", force_default="N/A"),
        "type":
        "APKG",
        "files": [
            f.get("path")
            for f in dig(artifact, "metadata", "files", force_default=[])
        ],
        "cpes":
        artifact.get("cpes", []),
    }

    # inject the artifact document into the "raw" analyzer document
    save_entry(findings, pkg_value, name)
Ejemplo n.º 11
0
def translate_and_save_entry(findings, artifact):
    """
    Handler function to map syft results for java-archive and jenkins-plugin types into the engine "raw" document format.
    """
    pkg_key = dig(artifact, "metadata", "virtualPath", default="N/A")

    virtualElements = pkg_key.split(":")
    if "." in virtualElements[-1]:
        # there may be an extension in the virtual path, use it
        java_ext = virtualElements[-1].split(".")[-1]
    else:
        # the last field is probably a package name, use the second to last virtual path element and extract the
        # extension
        java_ext = virtualElements[-2].split(".")[-1]

    # per the manifest specification https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest
    # these fields SHOULD be in the main section, however, there are multiple java packages found
    # where this information is thrown into named subsections.

    # Today anchore-engine reads key-value pairs in all sections into one large map --this behavior is replicated here.

    values = {}

    main_section = dig(artifact, "metadata", "manifest", "main", default={})
    named_sections = dig(artifact,
                         "metadata",
                         "manifest",
                         "namedSections",
                         default={})
    for name, section in [("main", main_section)
                          ] + [pair for pair in named_sections.items()]:
        for field, value in section.items():
            values[field] = value

    # find the origin
    group_id = dig(artifact, "metadata", "pomProperties", "groupId")
    origin = values.get("Specification-Vendor")
    if not origin:
        origin = values.get("Implementation-Vendor")

    # use pom properties over manifest info (if available)
    if group_id:
        origin = group_id

    # synthesize a part of the pom.properties
    pom_artifact_id = dig(artifact, "metadata", "pomProperties", "artifactId")
    pom_version = dig(artifact, "metadata", "pomProperties", "version")

    pomProperties = """
groupId={}
artifactId={}
version={}
""".format(group_id, pom_artifact_id, pom_version)

    pkg_value = {
        "name":
        artifact["name"],
        "specification-version":
        values.get("Specification-Version", "N/A"),
        "implementation-version":
        values.get("Implementation-Version", "N/A"),
        "maven-version":
        dig(artifact, "metadata", "pomProperties", "version", default="N/A"),
        "origin":
        origin or "N/A",
        "location":
        pkg_key,  # this should be related to full path
        "type":
        "java-" + java_ext,
        "cpes":
        artifact.get("cpes", []),
        "metadata": {
            "pom.properties": pomProperties
        },
    }

    # inject the artifact document into the "raw" analyzer document
    save_entry(findings, pkg_value, pkg_key)
Ejemplo n.º 12
0
def _all_package_files(findings, artifact):
    for file in dig(artifact, "metadata", "files", force_default=[]):
        pkgfile = file.get("path")
        findings["package_list"]["pkgfiles.all"]["base"][pkgfile] = "RPMFILE"