Ejemplo n.º 1
0
def dirname_handler(value, **kwargs):
    """
    Return a mapping of {'file_references': <list of FileReference dicts>}.
    Update the ``current_filerefs`` found in `kwargs` by adding the correct dir,
    basename and checksum value.
    """
    file_references = []
    current_filerefs = kwargs.get('current_filerefs') or []
    for dirindexes, checksum, basename in current_filerefs:
        dirname = value[int(dirindexes)]
        # TODO: review this. Empty filename does not make sense, unless these
        # are directories that we might ignore OK.

        # There is case where entry of basename is "</string>" which will
        # cause error as None type cannot be used for join.
        # Therefore, we need to convert the None type to empty string
        # in order to make the join works.
        if basename == None:
            basename = ''

        file_reference = models.FileReference(
            path=posixpath.join(dirname, basename),
            # TODO: add size and fileclass as extra data
        )

        # TODO: we could/should use instead the filedigestalgo RPM tag
        algo = infer_digest_algo(checksum)
        if algo:
            setattr(file_reference, algo, checksum)
        file_references.append(file_reference)

    return {'file_references': [fr.to_dict() for fr in file_references]}
Ejemplo n.º 2
0
def parse_debian_files_list(location, datasource_id, package_type):
    """
    Yield PackageData from a list of file paths at locations such as an from a
    Debian installed .list or .md5sums file.
    """
    qualifiers = {}
    filename = fileutils.file_base_name(location)
    if ':' in filename:
        name, _, arch = filename.partition(':')
        qualifiers['arch'] = arch
    else:
        name = filename

    file_references = []
    with open(location) as info_file:
        for line in info_file:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            # for a plain file lits, the md5sum will be empty
            md5sum, _, path = line.partition(' ')
            path = path.strip()
            md5sum = md5sum and md5sum.strip() or None

            # we ignore dirs in general, and we ignore these that would
            # be created a plain dir when we can
            if path in ignored_root_dirs:
                continue

            ref = models.FileReference(path=path, md5=md5sum)
            file_references.append(ref)

    if not file_references:
        return

    yield models.PackageData(
        datasource_id=datasource_id,
        type=package_type,
        name=name,
        qualifiers=qualifiers,
        file_references=file_references,
    )
Ejemplo n.º 3
0
def get_installed_dotnet_versions_from_regtree(
    registry_tree,
    datasource_id,
    package_type,
):
    """
    Yield PackageData for the installed versions of .NET framework from a
    Windows ``registry_tree``.
    """
    if not registry_tree:
        return

    for entry in registry_tree:
        # The .NET version can be found in the path whose last segment ends with
        # `Full`
        if not entry.get('path', '').endswith('\\Full'):
            continue

        file_references = []
        version = None
        for values in entry.get('values', []):
            key = values.get('name')
            value = values.get('value')

            if key == 'Version':
                version = value
            if key == 'InstallPath':
                file_references.append(models.FileReference(path=value))

        yield models.PackageData(
            datasource_id=datasource_id,
            type=package_type,
            name='microsoft-dot-net-framework',
            version=version,
            file_references=file_references,
        )
Ejemplo n.º 4
0
    def parse(cls, location):
        """
        Yield one or more Package manifest objects given a file ``location`` pointing to a
        package archive, manifest or similar.
        """
        with io.open(location, encoding='utf-8') as loc:
            package_data = saneyaml.load(loc.read())

        # About files can contain any purl and also have a namespace
        about_type = package_data.get('type')
        about_ns = package_data.get('namespace')
        purl_type = None
        purl_ns = None
        purl = package_data.get('purl')
        if purl:
            purl = PackageURL.from_string(purl)
            if purl:
                purl_type = purl.type

        package_type = about_type or purl_type or cls.default_package_type
        package_ns = about_ns or purl_ns

        name = package_data.get('name')
        version = package_data.get('version')

        homepage_url = package_data.get('home_url') or package_data.get(
            'homepage_url')
        download_url = package_data.get('download_url')
        copyright_statement = package_data.get('copyright')

        license_expression = package_data.get('license_expression')
        declared_license = license_expression

        owner = package_data.get('owner')
        if not isinstance(owner, str):
            owner = repr(owner)
        parties = [
            models.Party(type=models.party_person, name=owner, role='owner')
        ]

        # FIXME: also include notice_file and license_file(s) as file_references
        file_references = []
        about_resource = package_data.get('about_resource')
        if about_resource:
            file_references.append(models.FileReference(path=about_resource))

        # FIXME: we should put the unprocessed attributes in extra data
        yield models.PackageData(
            datasource_id=cls.datasource_id,
            type=package_type,
            namespace=package_ns,
            name=name,
            version=version,
            declared_license=declared_license,
            license_expression=license_expression,
            copyright=copyright_statement,
            parties=parties,
            homepage_url=homepage_url,
            download_url=download_url,
            file_references=file_references,
        )
Ejemplo n.º 5
0
    def parse(cls, location):

        with io.open(location, encoding='utf-8') as loc:
            package_data = json.load(loc)

        # we have two formats: v1 and v2
        lockfile_version = package_data.get('lockfileVersion', 1)
        root_name = package_data.get('name')
        root_version = package_data.get('version')
        root_ns, _ , root_name = root_name.rpartition('/')

        extra_data = dict(lockfile_version=lockfile_version)
        # this is the top level element that we return
        root_package_data = models.PackageData(
            datasource_id=cls.datasource_id,
            type=cls.default_package_type,
            primary_language=cls.default_primary_language,
            namespace=root_ns,
            name=root_name,
            version=root_version,
            extra_data=extra_data,
            **get_urls(root_ns, root_name, root_version)
        )

        # https://docs.npmjs.com/cli/v8/configuring-npm/package-lock-json#lockfileversion
        if lockfile_version == 1:
            deps_key = 'dependencies'
        else:
            # v2 and may be v3???
            deps_key = 'packages'

        deps_mapping = package_data.get(deps_key) or {}

        dependencies = []

        for dep, dep_data in deps_mapping.items():
            is_dev = dep_data.get('dev', False)
            is_optional = dep_data.get('optional', False)
            is_devoptional = dep_data.get('devOptional', False)
            if is_dev or is_devoptional:
                is_runtime = False
                is_optional = True
                scope = 'devDependencies'
            else:
                is_runtime = True
                is_optional = is_optional
                scope = 'dependencies'

            if not dep:
                # in v2 format the first dep is the same as the top level
                # package and has no name
                pass

            # only present for first top level
            # otherwise get name from dep
            name = dep_data.get('name')
            if not name:
                if 'node_modules/' in dep:
                    # the name is the last segment as the dep can be:
                    # "node_modules/ansi-align/node_modules/ansi-regex"
                    _, _, name = dep.rpartition('node_modules/')
                else:
                    name = dep
            ns, _ , name = name.rpartition('/')
            version = dep_data.get('version')

            dep_purl = PackageURL(
                type=cls.default_package_type,
                namespace=ns,
                name=name,
                version=version,
            ).to_string()

            dependency = models.DependentPackage(
                purl=dep_purl,
                extracted_requirement=version,
                scope=scope,
                is_runtime=is_runtime,
                is_optional=is_optional,
                is_resolved=True,
            )

            # only seen in v2 for the top level package... but good to keep
            declared_license = dep_data.get('license')

            # URLs and checksums
            misc = get_urls(ns, name, version)
            resolved = dep_data.get('resolved')
            misc.update(get_checksum_and_url(resolved).items())
            integrity = dep_data.get('integrity')
            misc.update(get_algo_hexsum(integrity).items())

            resolved_package = models.PackageData(
                datasource_id=cls.datasource_id,
                type=cls.default_package_type,
                primary_language=cls.default_primary_language,
                namespace=ns,
                name=name,
                version=version,
                declared_license=declared_license,
                **misc,
            )
            # these are paths t the root of the installed package in v2
            if dep:
                resolved_package.file_references = [models.FileReference(path=dep)],

            # v1 as name/constraint pairs
            subrequires = dep_data.get('requires') or {}

            # in v1 these are further nested dependencies
            # in v2 these are name/constraint pairs like v1 requires
            subdependencies = dep_data.get('dependencies')

            # v2? ignored for now
            dev_subdependencies = dep_data.get('devDependencies')
            optional_subdependencies = dep_data.get('optionalDependencies')
            engines = dep_data.get('engines')
            funding = dep_data.get('funding')

            if lockfile_version == 1:
                subdeps_data = subrequires
            else:
                subdeps_data = subdependencies
            subdeps_data = subdeps_data or {}

            sub_deps = []
            for subdep, subdep_req in subdeps_data.items():
                sdns, _ , sdname = subdep.rpartition('/')
                sdpurl = PackageURL(
                    type=cls.default_package_type,
                    namespace=sdns,
                    name=sdname
                ).to_string()
                sub_deps.append(
                    models.DependentPackage(
                        purl=sdpurl,
                        scope=scope,
                        extracted_requirement=subdep_req,
                        is_runtime=is_runtime,
                        is_optional=is_optional,
                        is_resolved=False,
                    )
                )
            resolved_package.dependencies = sub_deps
            dependency.resolved_package = resolved_package.to_dict()
            dependencies.append(dependency)

        root_package_data.dependencies = dependencies

        yield root_package_data
Ejemplo n.º 6
0
def get_file_references(files):
    """
    Return a list of FileReference from a ``files`` list of gem file paths.
    """
    files = files or []
    return [models.FileReference(path) for path in files]
Ejemplo n.º 7
0
def get_installed_windows_programs_from_regtree(
    registry_tree,
    datasource_id,
    package_type,
):
    """
    Yield installed Windows PackageData from a Windows ``registry_tree``.
    """
    if not registry_tree:
        return

    field_by_regkey = {
        'DisplayName': 'name',
        'DisplayVersion': 'version',
        'URLInfoAbout': 'homepage_url',
        'Publisher': 'publisher',
        'DisplayIcon': 'display_icon',
        'UninstallString': 'uninstall_string',
        'InstallLocation': 'install_location',
    }

    for entry in registry_tree:
        package_info = {}
        for entry_value in entry.get('values', []):
            key = entry_value.get('name')
            value = entry_value.get('value')
            pkg_field = field_by_regkey.get(key)
            if pkg_field:
                package_info[pkg_field] = value

        name = package_info.get('name')
        version = package_info.get('version')

        homepage_url = package_info.get('homepage_url')
        publisher = package_info.get('publisher')

        parties = []
        if publisher:
            parties.append(
                models.Party(
                    type=models.party_org,
                    role='publisher',
                    name=publisher,
                ))

        file_references = []
        install_location = package_info.get('install_location')
        if install_location:
            file_references.append(models.FileReference(path=install_location))

        display_icon = package_info.get('display_icon')
        if display_icon:
            file_references.append(models.FileReference(path=display_icon))

        uninstall_string = package_info.get('uninstall_string')
        if uninstall_string:
            file_references.append(models.FileReference(path=uninstall_string))

        yield models.PackageData(
            datasource_id=datasource_id,
            type=package_type,
            name=name,
            version=version,
            parties=parties,
            homepage_url=homepage_url,
            file_references=file_references,
        )