class PackageFile(BaseModel):
    """
    A file that belongs to a package.
    """

    path = String(
        label='Path of this installed file',
        help='The path of this installed file either relative to a rootfs '
        '(typical for system packages) or a path in this scan (typical '
        'for application packages).',
        repr=True,
    )

    size = Integer(label='file size', help='size of the file in bytes')

    sha1 = String(label='SHA1 checksum',
                  help='SHA1 checksum for this file in hexadecimal')

    md5 = String(label='MD5 checksum',
                 help='MD5 checksum for this file in hexadecimal')

    sha256 = String(label='SHA256 checksum',
                    help='SHA256 checksum for this file in hexadecimal')

    sha512 = String(label='SHA512 checksum',
                    help='SHA512 checksum for this file in hexadecimal')
Beispiel #2
0
class Party(BaseModel):
    """
    A party is a person, project or organization related to a package.
    """

    type = String(
        validator=choices(PARTY_TYPES),
        label='party type',
        help='the type of this party: One of: '
            +', '.join(p for p in PARTY_TYPES if p))

    role = String(
        label='party role',
        help='A role for this party. Something such as author, '
             'maintainer, contributor, owner, packager, distributor, '
             'vendor, developer, owner, etc.')

    name = String(
        label='name',
        help='Name of this party.')

    email = String(
        label='email',
        help='Email for this party.')

    url = String(
        label='url',
        help='URL to a primary web page for this party.')
Beispiel #3
0
class FileReference(ModelMixin):
    """
    A reference to a file in a files listing from a manifest or data file.
    """
    path = String(
        label='Path of this file.',
        help='The file or directory POSIX path. The actual root for this path '
        'is specific to a datafile format. For instance it is the rootfs '
        'root for Linux system packages.',
        repr=True,
    )

    size = Integer(
        label='file size',
        help='size of the file in bytes',
        repr=False,
    )

    sha1 = String(
        label='SHA1 checksum',
        help='SHA1 checksum for this file in hexadecimal',
        repr=False,
    )

    md5 = String(
        label='MD5 checksum',
        help='MD5 checksum for this file in hexadecimal',
        repr=False,
    )

    sha256 = String(
        label='SHA256 checksum',
        help='SHA256 checksum for this file in hexadecimal',
        repr=False,
    )

    sha512 = String(
        label='SHA512 checksum',
        help='SHA512 checksum for this file in hexadecimal',
        repr=False,
    )

    extra_data = Mapping(
        label='extra data',
        help='A mapping of arbitrary extra file reference data.',
    )

    def update(self, other):
        """
        Update this reference with an other file reference only for non-empty
        values.
        """
        for name, value in other.to_dict().items():
            if not value:
                continue
            current = getattr(self, name, None)
            if not current:
                setattr(self, name, value)
        return self
Beispiel #4
0
class DependentPackage(ModelMixin):
    """
    An identifiable dependent package package object.
    """

    purl = String(
        repr=True,
        label='Dependent package URL',
        help='A compact purl package URL. Typically when there is an '
        'unresolved requirement, there is no version. '
        'If the dependency is resolved, the version should be added to '
        'the purl')

    extracted_requirement = String(
        repr=True,
        label='extracted version requirement',
        help='String for the original version requirements and constraints. '
        'Package-type specific and as found originally in a datafile.')

    # ToDo: add `vers` support. See https://github.com/nexB/univers/blob/main/src/univers/version_range.py

    scope = String(
        repr=True,
        label='dependency scope',
        help='The scope of this dependency, such as runtime, install, etc. '
        'This is package-type specific and is the original scope string.')

    is_runtime = Boolean(
        default=True,
        label='is runtime flag',
        help='True if this dependency is a runtime dependency.')

    is_optional = Boolean(
        default=False,
        label='is optional flag',
        help='True if this dependency is an optional dependency')

    is_resolved = Boolean(
        default=False,
        label='is resolved flag',
        help='True if this dependency version requirement has '
        'been resolved and this dependency url points to an '
        'exact version.')

    resolved_package = Mapping(
        label='resolved package data',
        help='A mapping of resolved package data for this dependent package, '
        'either from the datafile or collected from another source. Some '
        'lockfiles for Composer or Cargo contain extra dependency data.')
class TypeDefinition(object):
    name = String(repr=True)
    filetypes = List(repr=True)
    mimetypes = List(repr=True)
    extensions = List(repr=True)
    strict = Boolean(repr=True,
        help=' if True, all criteria must be matched to select this detector.')
Beispiel #6
0
class DependentPackage(BaseModel):
    """
    An identifiable dependent package package object.
    """

    purl = String(
        repr=True,
        label='Dependent package URL',
        help=
        'A compact purl package URL. Typically when there is an unresolved requirement, there is no version. '
        'If the dependency is resolved, the version should be added to the purl'
    )

    requirement = String(
        repr=True,
        label='dependent package version requirement',
        help='A string defining version(s)requirements. Package-type specific.'
    )

    scope = String(
        repr=True,
        label='dependency scope',
        help='The scope of this dependency, such as runtime, install, etc. '
        'This is package-type specific and is the original scope string.')

    is_runtime = Boolean(
        default=True,
        label='is runtime flag',
        help='True if this dependency is a runtime dependency.')

    is_optional = Boolean(
        default=False,
        label='is optional flag',
        help='True if this dependency is an optional dependency')

    is_resolved = Boolean(
        default=False,
        label='is resolved flag',
        help='True if this dependency version requirement has '
        'been resolved and this dependency url points to an '
        'exact version.')
class Gem(object):
    """
    A Gem can be packaged as a .gem archive, or it can be a source gem either
    fetched from GIT or SVN or from a local path.
    """
    supported_opts = 'remote', 'ref', 'revision', 'branch', 'submodules', 'tag',

    name = String()
    version = String()

    platform = String(help='Gem platform')

    remote = String(
        help=
        'remote can be a path, git, svn or Gem repo url. One of GEM, PATH, GIT or SVN'
    )

    type = String(
        # validator=choices(GEM_TYPES),
        help='the type of this Gem: One of: {}'.format(', '.join(GEM_TYPES)))
    pinned = Boolean()
    spec_version = String()

    # relative path
    path = String()

    revision = String(
        help='A version control full revision (e.g. a Git commit hash).')

    ref = String(
        help=
        'A version control ref (such as a tag, a shortened revision hash, etc.).'
    )

    branch = String()
    submodules = String()
    tag = String()

    requirements = List(item_type=String,
                        help='list of constraints such as ">= 1.1.9"')

    dependencies = Mapping(
        help='a map of direct dependent Gems, keyed by name',
        value_type='Gem',
    )

    def refine(self):
        """
        Apply some refinements to the Gem based on its type:
         - fix version and revisions for Gems checked-out from VCS
        """
        if self.type == PATH:
            self.path = self.remote

        if self.type in (
                GIT,
                SVN,
        ):
            # FIXME: this likely WRONG
            # TODO: this may not be correct for SVN BUT SVN has been abandoned
            self.spec_version = self.version
            if self.revision and not self.ref:
                self.version = self.revision
            elif self.revision and self.ref:
                self.version = self.revision
            elif not self.revision and self.ref:
                self.version = self.ref
            elif not self.revision and self.ref:
                self.version = self.ref

    def as_nv_tree(self):
        """
        Return a tree of name/versions dependency tuples from self as nested
        dicts. The tree root is self. Each key is a name/version tuple.
        Values are dicts.
        """
        tree = {}
        root = (
            self.name,
            self.version,
        )
        tree[root] = {}
        for _name, gem in self.dependencies.items():
            tree[root].update(gem.as_nv_tree())
        return tree

    def flatten(self):
        """
        Return a sorted flattened list of unique parent/child tuples.
        """
        flattened = []
        seen = set()
        for gem in self.dependencies.values():
            snv = self.type, self.name, self.version
            gnv = gem.type, gem.name, gem.version
            rel = self, gem
            rel_key = snv, gnv
            if rel_key not in seen:
                flattened.append(rel)
                seen.add(rel_key)
            for rel in gem.flatten():
                parent, child = rel
                pnv = parent.type, parent.name, parent.version
                cnv = child.type, child.name, child.version
                rel_key = pnv, cnv
                if rel_key not in seen:
                    flattened.append(rel)
                    seen.add(rel_key)
        return sorted(flattened)

    def dependency_tree(self):
        """
        Return a tree of dependencies as nested mappings.
        Each key is a "name@version" string and values are dicts.
        """
        tree = {}
        root = '{}@{}'.format(self.name or '', self.version or '')
        tree[root] = {}
        for _name, gem in self.dependencies.items():
            tree[root].update(gem.dependency_tree())
        return tree

    def to_dict(self):
        """
        Return a native mapping for this Gem.
        """
        return dict([
            ('name', self.name),
            ('version', self.version),
            ('platform', self.platform),
            ('pinned', self.pinned),
            ('remote', self.remote),
            ('type', self.type),
            ('path', self.path),
            ('revision', self.revision),
            ('ref', self.ref),
            ('branch', self.branch),
            ('submodules', self.submodules),
            ('tag', self.tag),
            ('requirements', self.requirements),
            ('dependencies', self.dependency_tree()),
        ])

    @property
    def gem_name(self):
        return '{}-{}.gem'.format(self.name, self.version)
class GemDependency(object):
    name = String()
    version = String()
Beispiel #9
0
class Package(BasePackage):
    """
    A package object as represented by its manifest data.
    """

    # Optional. Public default type for a package class.
    default_primary_language = None

    primary_language = String(
        label='Primary programming language',
        help='Primary programming language',
    )

    description = String(
        label='Description',
        help='Description for this package. '
        'By convention the first should be a summary when available.')

    release_date = Date(label='release date',
                        help='Release date of the package')

    parties = List(
        item_type=Party,
        label='parties',
        help='A list of parties such as a person, project or organization.')

    keywords = List(item_type=str,
                    label='keywords',
                    help='A list of keywords.')

    homepage_url = String(label='homepage URL',
                          help='URL to the homepage for this package.')

    download_url = String(label='Download URL', help='A direct download URL.')

    size = Integer(default=None,
                   label='download size',
                   help='size of the package download in bytes')

    sha1 = String(label='SHA1 checksum',
                  help='SHA1 checksum for this download in hexadecimal')

    md5 = String(label='MD5 checksum',
                 help='MD5 checksum for this download in hexadecimal')

    sha256 = String(label='SHA256 checksum',
                    help='SHA256 checksum for this download in hexadecimal')

    sha512 = String(label='SHA512 checksum',
                    help='SHA512 checksum for this download in hexadecimal')

    bug_tracking_url = String(
        label='bug tracking URL',
        help='URL to the issue or bug tracker for this package')

    code_view_url = String(label='code view URL',
                           help='a URL where the code can be browsed online')

    vcs_url = String(
        help='a URL to the VCS repository in the SPDX form of: '
        'https://github.com/nexb/scancode-toolkit.git@405aaa4b3 '
        'See SPDX specification "Package Download Location" '
        'at https://spdx.org/spdx-specification-21-web-version#h.49x2ik5 ')

    copyright = String(
        label='Copyright',
        help='Copyright statements for this package. Typically one per line.')

    license_expression = String(
        label='license expression',
        help='The license expression for this package typically derived '
        'from its declared license or .')

    declared_license = String(
        label='declared license',
        help='The declared license mention, tag or text as found in a '
        'package manifest.')

    notice_text = String(label='notice text',
                         help='A notice text for this package.')

    root_path = String(
        label='package root path',
        help='The path to the root of the package documented in this manifest '
        'if any, such as a Maven .pom or a npm package.json parent directory.')

    dependencies = List(item_type=DependentPackage,
                        label='dependencies',
                        help='A list of DependentPackage for this package. ')

    contains_source_code = TriBoolean(
        label='contains source code',
        help=
        'Flag set to True if this package contains its own source code, None '
        'if this is unknown, False if not.')

    source_packages = List(
        item_type=String,
        label='List of related source code packages',
        help='A list of related  source code Package URLs (aka. "purl") for '
        'this package. For instance an SRPM is the "source package" for a '
        'binary RPM.')

    def __attrs_post_init__(self, *args, **kwargs):
        if not self.type and hasattr(self, 'default_type'):
            self.type = self.default_type

        if not self.primary_language and hasattr(self,
                                                 'default_primary_language'):
            self.primary_language = self.default_primary_language

    @classmethod
    def recognize(cls, location):
        """
        Yield one or more Package objects given a file location pointing to a
        package archive, manifest or similar.

        Sub-classes should override to implement their own package recognition.
        """
        raise NotImplementedError

    @classmethod
    def get_package_root(cls, manifest_resource, codebase):
        """
        Return the Resource for the package root given a `manifest_resource`
        Resource object that represents a manifest in the `codebase` Codebase.

        Each package type and instance have different conventions on how a
        package manifest relates to the root of a package.

        For instance, given a "package.json" file, the root of an npm is the
        parent directory. The same applies with a Maven "pom.xml". In the case
        of a "xyz.pom" file found inside a JAR META-INF/ directory, the root is
        the JAR itself which may not be the direct parent

        Each package type should subclass as needed. This default to return the
        same path.
        """
        return manifest_resource

    @classmethod
    def get_package_resources(cls, package_root, codebase):
        """
        Yield the Resources of a Package starting from `package_root`
        """
        if not Package.is_ignored_package_resource(package_root, codebase):
            yield package_root
        for resource in package_root.walk(
                codebase,
                topdown=True,
                ignored=Package.is_ignored_package_resource):
            yield resource

    @classmethod
    def ignore_resource(cls, resource, codebase):
        """
        Return True if `resource` should be ignored.
        """
        return False

    @staticmethod
    def is_ignored_package_resource(resource, codebase):
        from packagedcode import PACKAGE_TYPES
        return any(
            pt.ignore_resource(resource, codebase) for pt in PACKAGE_TYPES)

    def compute_normalized_license(self):
        """
        Return a normalized license_expression string using the declared_license
        field. Return 'unknown' if there is a declared license but it cannot be
        detected and return None if there is no declared license

        Subclasses can override to handle specifics such as supporting specific
        license ids and conventions.
        """
        return compute_normalized_license(self.declared_license)

    @classmethod
    def extra_key_files(cls):
        """
        Return a list of extra key file paths (or path glob patterns) beyond
        standard, well known key files for this Package. List items are strings
        that are either paths or glob patterns and are relative to the package
        root.

        Knowing if a file is a "key-file" file is important for classification
        and summarization. For instance, a JAR can have key files that are not
        top level under the META-INF directory. Or a .gem archive contains a
        metadata.gz file.

        Sub-classes can implement as needed.
        """
        return []

    @classmethod
    def extra_root_dirs(cls):
        """
        Return a list of extra package root-like directory paths (or path glob
        patterns) that should be considered to determine if a files is a top
        level file or not. List items are strings that are either paths or glob
        patterns and are relative to the package root.

        Knowing if a file is a "top-level" file is important for classification
        and summarization.

        Sub-classes can implement as needed.
        """
        return []
Beispiel #10
0
class BasePackage(BaseModel):
    """
    A base identifiable package object using discrete identifying attributes as
    specified here https://github.com/package-url/purl-spec.
    """

    # class-level attributes used to recognize a package
    filetypes = tuple()
    mimetypes = tuple()
    extensions = tuple()
    # list of known metafiles for a package type
    metafiles = []

    # Optional. Public default web base URL for package homepages of this
    # package type on the default repository.
    default_web_baseurl = None

    # Optional. Public default download base URL for direct downloads of this
    # package type the default repository.
    default_download_baseurl = None

    # Optional. Public default API repository base URL for package API calls of
    # this package type on the default repository.
    default_api_baseurl = None

    # Optional. Public default type for a package class.
    default_type = None

    # TODO: add description of the Package type for info
    # type_description = None

    type = String(
        repr=True,
        label='package type',
        help='Optional. A short code to identify what is the type of this '
        'package. For instance gem for a Rubygem, docker for container, '
        'pypi for Python Wheel or Egg, maven for a Maven Jar, '
        'deb for a Debian package, etc.')

    namespace = String(repr=True,
                       label='package namespace',
                       help='Optional namespace for this package.')

    name = String(repr=True, label='package name', help='Name of the package.')

    version = String(repr=True,
                     label='package version',
                     help='Optional version of the package as a string.')

    qualifiers = Mapping(
        default=None,
        value_type=str,
        converter=lambda v: normalize_qualifiers(v, encode=False),
        label='package qualifiers',
        help='Optional mapping of key=value pairs qualifiers for this package')

    subpath = String(
        label='extra package subpath',
        help='Optional extra subpath inside a package and relative to the root '
        'of this package')

    def __attrs_post_init__(self, *args, **kwargs):
        if not self.type and hasattr(self, 'default_type'):
            self.type = self.default_type

    @property
    def purl(self):
        """
        Return a compact purl package URL string.
        """
        if not self.name:
            return
        return PackageURL(self.type, self.namespace, self.name, self.version,
                          self.qualifiers, self.subpath).to_string()

    def repository_homepage_url(self, baseurl=default_web_baseurl):
        """
        Return the package repository homepage URL for this package, e.g. the
        URL to the page for this package in its package repository. This is
        typically different from the package homepage URL proper.
        Subclasses should override to provide a proper value.
        """
        return

    def repository_download_url(self, baseurl=default_download_baseurl):
        """
        Return the package repository download URL to download the actual
        archive of code of this package. This may be different than the actual
        download URL and is computed from the default public respoitory baseurl.
        Subclasses should override to provide a proper value.
        """
        return

    def api_data_url(self, baseurl=default_api_baseurl):
        """
        Return the package repository API URL to obtain structured data for this
        package such as the URL to a JSON or XML api.
        Subclasses should override to provide a proper value.
        """
        return

    def set_purl(self, package_url):
        """
        Update this Package object with the `package_url` purl string or
        PackageURL attributes.
        """
        if not package_url:
            return

        if not isinstance(package_url, PackageURL):
            package_url = PackageURL.from_string(package_url)

        attribs = [
            'type', 'namespace', 'name', 'version', 'qualifiers', 'subpath'
        ]
        for att in attribs:
            self_val = getattr(self, att)
            purl_val = getattr(package_url, att)
            if not self_val and purl_val:
                setattr(self, att, purl_val)

    def to_dict(self, **kwargs):
        """
        Return an dict of primitive Python types.
        """
        mapping = attr.asdict(self, dict_factory=dict)
        if not kwargs.get('exclude_properties'):
            mapping['purl'] = self.purl
            mapping['repository_homepage_url'] = self.repository_homepage_url()
            mapping['repository_download_url'] = self.repository_download_url()
            mapping['api_data_url'] = self.api_data_url()
        if self.qualifiers:
            mapping['qualifiers'] = normalize_qualifiers(self.qualifiers,
                                                         encode=False)
        return mapping

    @classmethod
    def create(cls, ignore_unknown=True, **kwargs):
        """
        Return a Package built from kwargs.
        Optionally `ignore_unknown` attributes provided in `kwargs`.
        """
        from packagedcode import get_package_class
        cls = get_package_class(kwargs, default=cls)
        return super(BasePackage, cls).create(ignore_unknown=ignore_unknown,
                                              **kwargs)
Beispiel #11
0
class PackageData(IdentifiablePackageData):
    """
    The data of a given package type. This is the core model to store normalized
    package data parsed from package datafiles (such as a manifest) or stored in
    a top-level package.
    """

    primary_language = String(
        label='Primary programming language',
        help='Primary programming language',
    )

    description = String(
        label='Description',
        help='Description for this package. '
        'By convention the first should be a summary when available.')

    release_date = Date(label='release date',
                        help='Release date of the package')

    parties = List(
        item_type=Party,
        label='parties',
        help='A list of parties such as a person, project or organization.')

    keywords = List(item_type=str,
                    label='keywords',
                    help='A list of keywords.')

    homepage_url = String(label='homepage URL',
                          help='URL to the homepage for this package.')

    download_url = String(label='Download URL', help='A direct download URL.')

    size = Integer(default=None,
                   label='download size',
                   help='size of the package download in bytes')

    sha1 = String(
        label='SHA1 checksum',
        help='SHA1 checksum for this package download in hexadecimal')

    md5 = String(label='MD5 checksum',
                 help='MD5 checksum for this package download in hexadecimal')

    sha256 = String(
        label='SHA256 checksum',
        help='SHA256 checksum for this package download in hexadecimal')

    sha512 = String(
        label='SHA512 checksum',
        help='SHA512 checksum for this package download in hexadecimal')

    bug_tracking_url = String(
        label='bug tracking URL',
        help='URL to the issue or bug tracker for this package')

    code_view_url = String(label='code view URL',
                           help='a URL where the code can be browsed online')

    vcs_url = String(
        help='a URL to the VCS repository in the SPDX form of: '
        'https://github.com/nexb/scancode-toolkit.git@405aaa4b3 '
        'See SPDX specification "Package Download Location" '
        'at https://spdx.org/spdx-specification-21-web-version#h.49x2ik5 ')

    copyright = String(
        label='Copyright',
        help='Copyright statements for this package. Typically one per line.')

    license_expression = String(
        label='license expression',
        help='The license expression for this package typically derived '
        'from its declared license or from some other type-specific '
        'routine or convention.')

    declared_license = String(
        label='declared license',
        help='The declared license mention, tag or text as found in a '
        'package manifest. This can be a string, a list or dict of '
        'strings possibly nested, as found originally in the manifest.')

    notice_text = String(label='notice text',
                         help='A notice text for this package.')

    source_packages = List(
        item_type=str,
        label='List of related source code package purls',
        help='A list of related  source code Package URLs (aka. "purl") for '
        'this package. For instance an SRPM is the "source package" for a '
        'binary RPM.')

    file_references = List(
        item_type=FileReference,
        label='referenced files',
        help='List of file paths and details for files referenced in a package '
        'manifest. These may not actually exist on the filesystem. '
        'The exact semantics and base of these paths is specific to a '
        'package type or datafile format.')

    extra_data = Mapping(
        label='extra data',
        help='A mapping of arbitrary extra package data.',
    )

    dependencies = List(item_type=DependentPackage,
                        label='dependencies',
                        help='A list of DependentPackage for this package.')

    repository_homepage_url = String(
        label='package repository homepage URL.',
        help='URL to the page for this package in its package repository. '
        'This is typically different from the package homepage URL proper.')

    repository_download_url = String(
        label='package repository download URL.',
        help='download URL to download the actual archive of code of this '
        'package in its package repository. '
        'This may be different from the actual download URL.')

    api_data_url = String(
        label='package repository API URL.',
        help='API URL to obtain structured data for this package such as the '
        'URL to a JSON or XML api its package repository.')

    datasource_id = String(
        label='datasource id',
        help='Datasource identifier for the source of these package data.',
        repr=True,
    )

    def to_dict(self, with_details=True, **kwargs):
        mapping = super().to_dict(with_details=with_details, **kwargs)
        if not with_details:
            # these are not used in the Package subclass
            mapping.pop('file_references', None)
            mapping.pop('dependencies', None)
            mapping.pop('datasource_id', None)

        return mapping

    @classmethod
    def from_dict(cls, mapping):
        """
        Return an instance of PackageData built from a ``mapping`` native Python
        data. Known attributes that store a list of objects are also
        "rehydrated" (such as models.Party).

        Unknown attributes provided in ``mapping`` that do not exist as fields
        in the class are kept as items in the extra_data mapping. An Exception
        is raised if an "unknown attribute" name already exists as an extra_data
        name.
        """
        # TODO: consider using a proper library for this such as cattrs,
        # marshmallow, etc. or use the field type that we declare.

        # Each of these are lists of class instances tracked here, which are stored
        # as a list of mappings in scanc_data

        # these are computed attributes serialized on a package
        # that should not be recreated when de-serializing
        computed_attributes = set([
            'purl',
        ])

        fields_by_name = attr.fields_dict(cls)

        extra_data = mapping.get('extra_data', {}) or {}
        package_data = {}

        list_fields_by_item = {
            'parties': Party,
            'dependencies': DependentPackage,
            'file_references': FileReference,
        }

        for name, value in mapping.items():
            if not value:
                continue

            if name in computed_attributes:
                continue

            field = fields_by_name.get(name)
            if not field:
                # keep unknown fields as extra data
                if name not in extra_data:
                    extra_data[name] = value
                    continue
                else:
                    raise Exception(
                        f'Invalid package "scan_data" with duplicated name: {name!r}={value!r} '
                        f'present both as attribute AND as extra_data: {name!r}={extra_data[name]!r}'
                    )

            # re-hydrate lists of typed objects
            list_item_type = is_list_field = list_fields_by_item.get(name)

            if is_list_field:
                items = list(_rehydrate_list(cls=list_item_type, values=value))
                package_data[name] = items
            else:
                # this is a plain, non-nested field
                package_data[name] = value

        return super().from_dict(package_data)
Beispiel #12
0
class Dependency(DependentPackage):
    """
    Top-level dependency instance from parsed package data collected from data
    files such as a package manifest or lockfile.
    """
    dependency_uid = String(
        label='Dependency unique id',
        help='A unique identifier for this dependency instance.'
        'Consists of the dependency purl with a UUID qualifier.')

    # TODO: should we also repeat the purl here: this may be redundant but this
    # would help avoid lookups
    for_package_uid = String(
        label='A Package unique id',
        help='The unique id of the package instance to which this dependency '
        'file belongs. This is the purl with a uuid qualifier.')

    datafile_path = String(
        label='Path to datafile.',
        help='A POSIX path string to the package datafile that describes this '
        'dependency.')

    datasource_id = String(
        label='datasource id',
        help='Datasource identifier for the source of these package data.')

    def __attrs_post_init__(self, *args, **kwargs):
        if not self.dependency_uid:
            self.dependency_uid = build_package_uid(self.purl)

    @classmethod
    def from_dependent_package(
        cls,
        dependent_package,
        datafile_path,
        datasource_id,
        package_uid=None,
    ):
        """
        Return a Dependency from a ``dependent_package`` DependentPackage object
        or mapping.
        """
        if isinstance(dependent_package, DependentPackage):
            dependent_package = dependent_package.to_dict()
        else:
            # make a copy
            dependent_package = dict(dependent_package)

        dependent_package['datafile_path'] = datafile_path
        dependent_package['datasource_id'] = datasource_id
        dependent_package['for_package_uid'] = package_uid

        return cls.from_dict(dependent_package)

    @classmethod
    def from_dependent_packages(
        cls,
        dependent_packages,
        datafile_path,
        datasource_id,
        package_uid=None,
    ):
        """
        Yield Dependency objects from a ``dependent_packages`` list of
        DependentPackage object or mappings found in the ``datafile_path`` with
        ``datasource_id`` for the ``package_uid``.
        """
        dependent_packages = dependent_packages or []
        for dependent_package in dependent_packages:
            if dependent_package.purl:
                yield Dependency.from_dependent_package(
                    dependent_package=dependent_package,
                    datafile_path=datafile_path,
                    datasource_id=datasource_id,
                    package_uid=package_uid,
                )
            else:
                if TRACE:
                    logger_debug(
                        f' Dependency.from_dependent_packages: dependent_package (does not have purl): {dependent_package}'
                    )
                pass
Beispiel #13
0
class IdentifiablePackageData(ModelMixin):
    """
    Identifiable package data object using purl as identifying attribute as
    specified here https://github.com/package-url/purl-spec.
    This base class is used for all package-like objects be they a manifest
    or an actual package instance.
    """
    type = String(
        repr=True,
        label='package type',
        help='A short code to identify what is the type of this '
        'package. For instance gem for a Rubygem, docker for container, '
        'pypi for Python Wheel or Egg, maven for a Maven Jar, '
        'deb for a Debian package, etc.')

    namespace = String(repr=True,
                       label='package namespace',
                       help='Namespace for this package.')

    name = String(repr=True, label='package name', help='Name of the package.')

    version = String(repr=True,
                     label='package version',
                     help='Version of the package as a string.')

    qualifiers = Mapping(
        default=None,
        value_type=str,
        converter=lambda v: normalize_qualifiers(v, encode=False),
        label='package qualifiers',
        help='Mapping of key=value pairs qualifiers for this package')

    subpath = String(label='extra package subpath',
                     help='Subpath inside a package and relative to the root '
                     'of this package')

    @property
    def purl(self):
        """
        Return a compact Package URL string or None.
        """
        if self.name:
            return PackageURL(
                type=self.type,
                namespace=self.namespace,
                name=self.name,
                version=self.version,
                qualifiers=self.qualifiers,
                subpath=self.subpath,
            ).to_string()

    def set_purl(self, package_url):
        """
        Update this object with the ``package_url`` purl string or PackageURL if
        there is no pre-existing value for a given purl attribute.
        """
        if not package_url:
            return

        if not isinstance(package_url, PackageURL):
            package_url = PackageURL.from_string(package_url)

        for key, value in package_url.to_dict().items():
            self_val = getattr(self, key)
            if not self_val and value:
                setattr(self, attr, value)

    def to_dict(self, **kwargs):
        mapping = super().to_dict(**kwargs)
        mapping['purl'] = self.purl

        if self.qualifiers:
            mapping['qualifiers'] = normalize_qualifiers(
                qualifiers=self.qualifiers,
                encode=False,
            )

        return mapping
Beispiel #14
0
class Package(PackageData):
    """
    Top-level package instance assembled from parsed package data collected
    from one or more data files such as manifests or lockfiles.
    """

    package_uid = String(label='Package unique id',
                         help='A unique identifier for this package instance.'
                         'Consists of the package purl with a UUID qualifier.')

    datafile_paths = List(
        item_type=str,
        label='List of datafile paths',
        help='List of datafile paths used to create this package.')

    datasource_ids = List(
        item_type=str,
        label='datasource ids',
        help='List of the datasource ids used to create this package.')

    def __attrs_post_init__(self, *args, **kwargs):
        if not self.package_uid:
            self.package_uid = build_package_uid(self.purl)

    def to_dict(self):
        return super().to_dict(with_details=False)

    @classmethod
    def from_package_data(cls, package_data, datafile_path):
        """
        Return a Package from a ``package_data`` PackageData object
        or mapping. Or None.
        """
        if isinstance(package_data, PackageData):
            package_data_mapping = package_data.to_dict()
            dsid = package_data.datasource_id
        elif isinstance(package_data, dict):
            # make a copy
            package_data_mapping = dict(package_data.items())
            dsid = package_data['datasource_id']
        elif package_data:
            raise Exception(f'Invalid type: {package_data!r}', package_data)

        package_data_mapping['datafile_paths'] = [datafile_path]
        package_data_mapping['datasource_ids'] = [dsid]

        return cls.from_dict(package_data_mapping)

    @classmethod
    def from_dict(cls, mapping):
        """
        Return an instance of Package built from a ``mapping`` of native Python
        data, typically a PackageData-like ``mapping``. Return None if there are
        not enough data to form a PackageURL from this data.

        See PackageData.from_dict() for other details.
        """
        if build_purl(mapping):
            return super().from_dict(mapping)

    def is_compatible(self, package_data, include_qualifiers=True):
        """
        Return True if the ``package_data`` PackageData is compatible with
        this Package, e.g. it is about the same package.
        """
        return (self.type == package_data.type
                and self.namespace == package_data.namespace
                and self.name == package_data.name
                and self.version == package_data.version
                and (include_qualifiers
                     and self.qualifiers == package_data.qualifiers)
                and self.subpath == package_data.subpath
                and self.primary_language == package_data.primary_language)

    def update(self, package_data, datafile_path, replace=False):
        """
        Update this Package with data from the ``package_data`` PackageData.

        If a field does not have a value and the ``package_data`` field has a
        value, set this package field to the ``package_data`` field value.

        If there is a value on both side, update the value according to the
        ``replace`` flag.

        If ``replace`` is True, replace a value with the ``package_data`` value.
        Otherwise existing, non-empty values are left unchanged.

        List of values are merged, keeping the original order and avoiding duplicates.

        Return True if update is successful.

        Return False if there is a type, name or version mismatch between this
        package and the provided ``package_data``
        """
        if not package_data:
            return

        if not self.is_compatible(package_data, include_qualifiers=False):
            if TRACE_UPDATE:
                logger_debug(
                    f'update: {self.purl} not compatible with: {package_data.purl}'
                )
            return False

        # always append these new items
        self.datasource_ids.append(package_data.datasource_id)
        self.datafile_paths.append(datafile_path)

        existing = self.to_package_data().to_dict()
        new_package_data = package_data.to_dict()

        # update for these means combining lists of items from both sides
        list_fields = set([
            'parties',
            'dependencies',
            'file_references',
        ])

        for name, value in existing.items():
            new_value = new_package_data.get(name)

            if TRACE_UPDATE:
                logger_debug(
                    f'update: {name!r}={value!r} with new_value: {new_value!r}'
                )

            if not new_value:
                if TRACE_UPDATE: logger_debug('  No new value: skipping')
                continue

            if not value:
                if TRACE_UPDATE: logger_debug('  set existing value to new')
                setattr(self, name, new_value)
                continue

            if replace:
                if TRACE_UPDATE:
                    logger_debug('  replace existing value to new')
                setattr(self, name, new_value)
                continue

            # here we do not replace... but we still merge lists/mappings
            if name == 'extra_data':
                value.update(new_value)

            if name in list_fields:
                if TRACE_UPDATE: logger_debug('  merge lists of values')
                merged = merge_sequences(list1=value, list2=new_value)
                setattr(self, name, merged)

            elif TRACE_UPDATE and value != new_value:
                if TRACE_UPDATE: logger_debug('  skipping update: no replace')

        return True
Beispiel #15
0
class DebianPackage(models.Package):
    metafiles = ('*.control', )
    extensions = ('.deb', )
    filetypes = ('debian binary package', )
    mimetypes = (
        'application/x-archive',
        'application/vnd.debian.binary-package',
    )
    default_type = 'deb'

    multi_arch = String(label='Multi-Arch',
                        help='Multi-Arch value from status file')

    installed_files = List(item_type=models.PackageFile,
                           label='installed files',
                           help='List of files installed by this package.')

    def to_dict(self, _detailed=False, **kwargs):
        data = models.Package.to_dict(self, **kwargs)
        if _detailed:
            #################################################
            # remove temporary fields
            data['multi_arch'] = self.multi_arch
            data['installed_files'] = [
                istf.to_dict() for istf in (self.installed_files or [])
            ]
            #################################################
        else:
            #################################################
            # remove temporary fields
            data.pop('multi_arch', None)
            data.pop('installed_files', None)
            #################################################

        return data

    def populate_installed_files(self, var_lib_dpkg_info_dir):
        """
        Populate the installed_file  attribute given a `var_lib_dpkg_info_dir`
        path to a Debian /var/lib/dpkg/info directory.
        """
        self.installed_files = self.get_list_of_installed_files(
            var_lib_dpkg_info_dir)

    def get_list_of_installed_files(self, var_lib_dpkg_info_dir):
        """
        Return a list of InstalledFile given a `var_lib_dpkg_info_dir` path to a
        Debian /var/lib/dpkg/info directory where <package>.list and/or
        <package>.md5sums files can be found for a package name.
        We first use the .md5sums file and switch to the .list file otherwise.
        The .list files also contains directories.
        """

        # Multi-Arch can be: foreign, same, allowed or empty
        # We only need to adjust the md5sum path in the case of `same`
        if self.multi_arch == 'same':
            arch = ':{}'.format(self.qualifiers.get('arch'))
        else:
            arch = ''

        package_md5sum = '{}{}.md5sums'.format(self.name, arch)
        md5sum_file = os.path.join(var_lib_dpkg_info_dir, package_md5sum)

        package_list = '{}{}.list'.format(self.name, arch)
        list_file = os.path.join(var_lib_dpkg_info_dir, package_list)

        has_md5 = os.path.exists(md5sum_file)
        has_list = os.path.exists(list_file)

        if not (has_md5 or has_list):
            return []

        installed_files = []
        directories = set()
        if has_md5:
            with open(md5sum_file) as info_file:
                for line in info_file:
                    line = line.strip()
                    if not line:
                        continue
                    md5sum, _, path = line.partition(' ')
                    md5sum = md5sum.strip()

                    path = path.strip()
                    if not path.startswith('/'):
                        path = '/' + path

                    # we ignore dirs in general, and we ignore these that would
                    # be created a plain dir when we can
                    if path in ignored_root_dirs:
                        continue

                    installed_file = models.PackageFile(path=path, md5=md5sum)

                    installed_files.append(installed_file)
                    directories.add(os.path.dirname(path))

        elif has_list:
            with open(list_file) as info_file:
                for line in info_file:
                    line = line.strip()
                    if not line:
                        continue
                    md5sum = None
                    path = line

                    path = path.strip()
                    if not path.startswith('/'):
                        path = '/' + path

                    # we ignore dirs in general, and we ignore these that would
                    # be created a plain dir when we can
                    if path in ignored_root_dirs:
                        continue

                    installed_file = models.PackageFile(path=path, md5=md5sum)
                    if installed_file not in installed_files:
                        installed_files.append(installed_file)
                    directories.add(os.path.dirname(path))

        # skip directories when possible
        installed_files = [
            f for f in installed_files if f.path not in directories
        ]

        return installed_files

    def get_copyright_file_path(self, root_dir):
        """
        Given a root_dir path to a filesystem root, return the path to a copyright file
        for this Package
        """
        # We start by looking for a copyright file stored in a directory named after the
        # package name. Otherwise we look for a copyright file stored in a source package
        # name.
        candidate_names = [self.name]
        candidate_names.extend(
            PackageURL.from_string(sp).name for sp in self.source_packages)

        copyright_file = os.path.join(root_dir, 'usr/share/doc/{}/copyright')

        for name in candidate_names:
            copyright_loc = copyright_file.format(name)
            if os.path.exists(copyright_loc):
                return copyright_loc