def author_mapper(authors_content, package):
    """
    Update package parties with authors and return package.
    https://getcomposer.org/doc/04-schema.md#authors
    """
    for name, role, email, url in parse_person(authors_content):
        role = role or 'author'
        package.parties.append(
            models.Party(type=models.party_person,
                         name=name,
                         role=role,
                         email=email,
                         url=url))
    return package
Beispiel #2
0
def m_maintainer_handler(value, **kwargs):
    """
    Return a Package data mapping as a list of parties a maintainer Party.
    A maintainer value may be one or more mail name <*****@*****.**> parts, space-separated.
    """
    parties = []
    for name, email in get_maintainers(value):
        maintainer = models.Party(
            type='person',
            role='maintainer',
            name=name,
            email=email,
        )
        parties.append(maintainer)
    return {'parties': parties}
Beispiel #3
0
def build_package(package_data):
    """
    Return a Package object from a package_data mapping (from a
    haxelib.json or similar) or None.
    {
        "name": "haxelib",
        "url" : "https://lib.haxe.org/documentation/",
        "license": "GPL",
        "tags": ["haxelib", "core"],
        "description": "The haxelib client",
        "classPath": "src",
        "version": "3.4.0",
        "releasenote": " * Fix password input issue in Windows (#421).\n * ....",
        "contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"]
    }

    """
    package = HaxePackage(
        name=package_data.get('name'),
        version=package_data.get('version'),
        homepage_url=package_data.get('url'),
        declared_license=package_data.get('license'),
        keywords=package_data.get('tags'),
        description=package_data.get('description'),
    )

    package.download_url = package.repository_download_url()

    for contrib in package_data.get('contributors', []):
        party = models.Party(
            type=models.party_person,
            name=contrib,
            role='contributor',
            url='https://lib.haxe.org/u/{}'.format(contrib))
        package.parties.append(party)

    for dep_name, dep_version in package_data.get('dependencies', {}).items():
        dep_version = dep_version and dep_version.strip()
        is_resolved = bool(dep_version)
        dep_purl = PackageURL(
            type='haxe',
            name=dep_name,
            version=dep_version
        ).to_string()
        dep = models.DependentPackage(purl=dep_purl, is_resolved=is_resolved,)
        package.dependencies.append(dep)

    return package
Beispiel #4
0
def parse_setup_py(location):
    """
    Return a package built from setup.py data.
    """
    if not location or not location.endswith('setup.py'):
        return

    # FIXME: what if this is unicode text?
    if py2:
        mode = 'rb'
    else:
        mode = 'r'
    with open(location, mode) as inp:
        setup_text = inp.read()

    description = build_description(
        get_setup_attribute(setup_text, 'summary'),
        get_setup_attribute(setup_text, 'description'))

    parties = []
    author = get_setup_attribute(setup_text, 'author')
    if author:
        parties.append(
            models.Party(type=models.party_person, name=author, role='author'))

    declared_license = OrderedDict()
    license_setuptext = get_setup_attribute(setup_text, 'license')
    declared_license['license'] = license_setuptext

    classifiers = get_classifiers(setup_text)
    license_classifiers = [c for c in classifiers if c.startswith('License')]
    declared_license['classifiers'] = license_classifiers

    other_classifiers = [c for c in classifiers if not c.startswith('License')]

    package = PythonPackage(
        name=get_setup_attribute(setup_text, 'name'),
        version=get_setup_attribute(setup_text, 'version'),
        description=description or None,
        homepage_url=get_setup_attribute(setup_text, 'url') or None,
        parties=parties,
        declared_license=declared_license,
        keywords=other_classifiers,
    )
    return package
Beispiel #5
0
def parse_pkg_info(location):
    """
    Return a Package from a a 'PKG-INFO' file at 'location' or None.
    """
    if not location or not location.endswith('PKG-INFO'):
        return
    infos = {}
    with open(location, 'rb') as inp:
        pkg_info = inp.read()

    for attribute in PKG_INFO_ATTRIBUTES:
        # FIXME: what is this code doing? this is cryptic at best and messy
        infos[attribute] = re.findall('^' + attribute + '[\s:]*.*',
                                      pkg_info,
                                      flags=re.MULTILINE)[0]
        infos[attribute] = re.sub('^' + attribute + '[\s:]*',
                                  '',
                                  infos[attribute],
                                  flags=re.MULTILINE)
        if infos[attribute] == 'UNKNOWN':
            infos[attribute] = None

    description = build_description(infos.get('Summary'),
                                    infos.get('Description'))

    parties = []
    author = infos.get('Author')
    if author:
        parties.append(
            models.Party(type=models.party_person, name=author, role=''))

    package = PythonPackage(
        name=infos.get('Name'),
        version=infos.get('Version'),
        description=description or None,
        homepage_url=infos.get('Home-page') or None,
        # FIXME: this is NOT correct as classifiers can be used for this too
        declared_license=infos.get('License') or None,
        # FIXME: what about email?
        # FIXME: what about maintainers?
        parties=parties,
    )
    return package
Beispiel #6
0
def parse(location):
    """
    Return a Package built from parsing a file at 'location'
    The file name can be either a 'setup.py', 'metadata.json' or 'PKG-INFO' file.
    """
    file_name = fileutils.file_name(location)
    if file_name == 'setup.py':
        package = PythonPackage(
            name=get_attribute(location, 'name'),
            homepage_url=get_attribute(location, 'url'),
            description=get_attribute(location, 'description'),
            version=get_attribute(location, 'version'),
            authors=[models.Party(type=models.party_person, name=get_attribute(location, 'author'))],
            asserted_licenses=[AssertedLicense(license=get_attribute(location, 'license'))],
        )
        return package
    if file_name == 'metadata.json':
        parse_metadata(location)
    if file_name == 'PKG-INFO':
        parse_pkg_info(location)
    def recognize(cls, location):
        """
        Yield one or more Package manifest objects given a file ``location`` pointing to a
        package archive, manifest or similar.
        """
        with io.open(location, encoding='utf-8') as loc:
            package_data = saneyaml.load(loc.read())

        name = package_data.get('name')
        # FIXME: having no name may not be a problem See #1514
        if not name:
            return

        version = package_data.get('version')
        homepage_url = package_data.get('home_url') or package_data.get(
            'homepage_url')
        download_url = package_data.get('download_url')
        declared_license = package_data.get('license_expression')
        copyright_statement = package_data.get('copyright')

        owner = package_data.get('owner')
        if not isinstance(owner, str):
            owner = repr(owner)
        parties = [
            models.Party(type=models.party_person, name=owner, role='owner')
        ]

        about_package = cls(
            type='about',
            name=name,
            version=version,
            declared_license=declared_license,
            copyright=copyright_statement,
            parties=parties,
            homepage_url=homepage_url,
            download_url=download_url,
        )

        about_package.extra_data['about_resource'] = package_data.get(
            'about_resource')
        yield about_package
Beispiel #8
0
 def test_MetadataBzl_parse(self):
     test_file = self.get_test_loc('metadatabzl/METADATA.bzl')
     result_packages = build.BuckMetadataBzlHandler.parse(test_file)
     expected_packages = [
         models.PackageData(
             datasource_id=build.BuckMetadataBzlHandler.datasource_id,
             type='github',
             name='example',
             version='0.0.1',
             declared_license=['BSD-3-Clause'],
             parties=[
                 models.Party(
                     type=models.party_org,
                     name='oss_foundation',
                     role='maintainer'
                 )
             ],
             homepage_url='https://github.com/example/example',
         ),
     ]
     compare_package_results(expected_packages, result_packages)
Beispiel #9
0
def create_package_data_from_msiinfo_results(
    msiinfo_results,
    datasource_id='msi_installer',
    package_type='msi',
):
    """
    Return PackageData from a mapping of `msiinfo_results`
    """
    author_name = msiinfo_results.pop('Author', '')
    parties = []
    if author_name:
        parties.append(
            models.Party(
                type=None,
                role='author',
                name=author_name
            )
        )

    # Currently, we use the contents `Subject` field from the msiinfo suminfo
    # results as the package name because it contains the package name most of
    # the time. Getting the version out of the `Subject` string is not
    # straightforward because the format of the string is usually different
    # between different MSIs
    subject = msiinfo_results.pop('Subject', '')
    name = subject
    version = get_version_from_subject_line(subject)
    description = msiinfo_results.pop('Comments', '')
    keywords = msiinfo_results.pop('Keywords', [])

    return models.PackageData(
        datasource_id=datasource_id,
        type=package_type,
        name=name,
        version=version,
        description=description,
        parties=parties,
        keywords=keywords,
        extra_data=msiinfo_results
    )
    def recognize(cls, location):
        """
        Yield one or more Package manifest objects given a file ``location`` pointing to a
        package archive, manifest or similar.
        """
        with open(location, 'rb') as loc:
            parsed = xmltodict.parse(loc)

        if TRACE:
            logger_debug('parsed:', parsed)
        if not parsed:
            return

        assembly = parsed.get('assembly', {})
        description = assembly.get('@description', '')
        company = assembly.get('@company', '')
        copyright = assembly.get('@copyright', '')
        support_url = assembly.get('@supportInformation', '')

        assembly_identity = assembly.get('assemblyIdentity', {})
        name = assembly_identity.get('@name', '')
        version = assembly_identity.get('@version', '')

        parties = []
        if company:
            parties.append(
                models.Party(
                    name=company,
                    type=models.party_org,
                    role='owner',
                ))

        yield cls(
            name=name,
            version=version,
            description=description,
            homepage_url=support_url,
            parties=parties,
            copyright=copyright,
        )
Beispiel #11
0
def parse_metadata(location):
    """
    Return a Package object from the Python wheel 'metadata.json' file at 'location'
    or None. Check if the parent directory of 'location' contains both a 'METADATA'
    and a 'DESCRIPTION.rst' file.
    """
    if not location or not location.endswith('metadata.json'):
        return
    parent_dir = fileutils.parent_directory(location)
    # FIXME: is the absence of these two files a show stopper?
    if not all(os.path.exists(os.path.join(parent_dir, fname))
               for fname in ('METADATA', 'DESCRIPTION.rst')):
        return
    # FIXME: wrap in a with statement
    infos = json.loads(open(location, 'rb').read())
    print(infos)
    homepage_url = None
    authors = []
    if infos['extensions']:
        try:
            homepage_url = infos['extensions']['python.details']['project_urls']['Home']
        except:
            # FIXME: why catch all expections?
            pass
        try:
            for contact in infos['extensions']['python.details']['contacts']:
                authors.append(models.Party(type=models.party_person, name=contact['name'],))
        except:
            # FIXME: why catch all expections?
            pass

    package = PythonPackage(
        name=infos.get('name'),
        version=infos.get('version'),
        summary=infos.get('summary'),
        asserted_licenses=[AssertedLicense(license=infos.get('license'))],
        homepage_url=homepage_url,
        authors=authors,
    )
    return package
Beispiel #12
0
def parse_setup_py(location):
    """
    Return a package built from setup.py data.
    """
    if not location or not location.endswith('setup.py'):
        return

    # FIXME: what if this is unicode text?
    with open(location, 'rb') as inp:
        setup_text = inp.read()

    description = build_description(
        get_setup_attribute(setup_text, 'summary'),
        get_setup_attribute(setup_text, 'description'))

    parties = []
    author = get_setup_attribute(setup_text, 'author')
    if author:
        parties.append(
            models.Party(type=models.party_person, name=author, role='author'))

    classifiers = get_classifiers(setup_text)
    license_classifiers = [c for c in classifiers if c.startswith('License')]
    other_classifiers = [c for c in classifiers if not c.startswith('License')]

    licenses = [get_setup_attribute(setup_text, 'license')
                ] + license_classifiers
    declared_license = '\n'.join(l for l in licenses if l and l.strip())

    package = PythonPackage(
        name=get_setup_attribute(setup_text, 'name'),
        version=get_setup_attribute(setup_text, 'version'),
        description=description or None,
        homepage_url=get_setup_attribute(setup_text, 'url') or None,
        parties=parties,
        declared_license=declared_license,
        keywords=other_classifiers,
    )
    return package
Beispiel #13
0
def parse(location):
    """
    Return a MicrosoftUpdateManifestPackage from a .mum XML file at `location`.
    Return None if this is not a parsable .mum file.
    """
    parsed = parse_mum(location)
    if TRACE:
        logger_debug('parsed:', parsed)
    if not parsed:
        return

    assembly = parsed.get('assembly', {})
    description = assembly.get('@description', '')
    company = assembly.get('@company', '')
    copyright = assembly.get('@copyright', '')
    support_url = assembly.get('@supportInformation', '')

    assembly_identity = assembly.get('assemblyIdentity', {})
    name = assembly_identity.get('@name', '')
    version = assembly_identity.get('@version', '')

    parties = []
    if company:
        parties.append(
            models.Party(
                name=company,
                type=models.party_org,
                role='owner',
            )
        )

    return MicrosoftUpdateManifestPackage(
        name=name,
        version=version,
        description=description,
        homepage_url=support_url,
        parties=parties,
        copyright=copyright,
    )
Beispiel #14
0
def parse_with_pkginfo(pkginfo):
    if pkginfo and pkginfo.name:
        common_data = dict(
            name=pkginfo.name,
            version=pkginfo.version,
            description=pkginfo.description,
            download_url=pkginfo.download_url,
            homepage_url=pkginfo.home_page,
        )
        package = PythonPackage(**common_data)
        if pkginfo.license:
            # TODO: We should make the declared license as it is, this should be updated in scancode to parse a pure string
            package.declared_license = {'license': pkginfo.license}

        if pkginfo.maintainer:
            common_data['parties'] = []
            common_data['parties'].append(
                models.Party(type=models.party_person,
                             name=pkginfo.maintainer,
                             role='author',
                             email=pkginfo.maintainer_email))
        return package
Beispiel #15
0
def parse_with_pkginfo(pkginfo):
    if pkginfo and pkginfo.name:
        description = pkginfo.description
        if not description:
            description = pkginfo.summary
        common_data = dict(
            name=pkginfo.name,
            version=pkginfo.version,
            description=description,
            download_url=pkginfo.download_url,
            homepage_url=pkginfo.home_page,
        )
        package = PythonPackage(**common_data)
        declared_license = {}
        if pkginfo.license:
            # TODO: We should make the declared license as it is, this should be updated in scancode to parse a pure string
            declared_license['license'] = pkginfo.license
        if pkginfo.classifiers:
            license_classifiers = []
            other_classifiers = []
            for classifier in pkginfo.classifiers:
                if classifier.startswith('License'):
                    license_classifiers.append(classifier)
                else:
                    other_classifiers.append(classifier)
            declared_license['classifiers'] = license_classifiers
            package.keywords = other_classifiers
        if declared_license:
            package.declared_license = declared_license
        if pkginfo.author_email:
            parties = []
            parties.append(
                models.Party(type=models.party_person,
                             name=pkginfo.author,
                             role='author',
                             email=pkginfo.author_email))
            package.parties = parties
        return package
Beispiel #16
0
    def parse(cls, location):
        with open(location, 'rb') as loc:
            parsed = xmltodict.parse(loc)

        if not parsed:
            return

        assembly = parsed.get('assembly', {})
        description = assembly.get('@description', '')
        company = assembly.get('@company', '')
        copyrght = assembly.get('@copyright', '')
        support_url = assembly.get('@supportInformation', '')

        assembly_identity = assembly.get('assemblyIdentity', {})
        name = assembly_identity.get('@name', '')
        version = assembly_identity.get('@version', '')

        parties = []
        if company:
            parties.append(
                models.Party(
                    name=company,
                    type=models.party_org,
                    role='owner',
                ))

        yield models.PackageData(
            datasource_id=cls.datasource_id,
            type=cls.default_package_type,
            name=name,
            version=version,
            description=description,
            homepage_url=support_url,
            parties=parties,
            copyright=copyrght,
        )
Beispiel #17
0
def build_package_data(debian_data,
                       datasource_id,
                       package_type='deb',
                       distro=None):
    """
    Return a PackageData object from a package_data mapping (from a dpkg status
    or similar file) or None.
    """
    name = debian_data.get('package')
    version = debian_data.get('version')

    qualifiers = {}
    architecture = debian_data.get('architecture')
    if architecture:
        qualifiers['architecture'] = architecture

    extra_data = {}
    # Multi-Arch can be: "foreign", "same", "allowed", "all", "optional" or
    # empty/non-present. See https://wiki.debian.org/Multiarch/HOWTO
    multi_arch = debian_data.get('multi-arch')
    if multi_arch:
        extra_data['multi_arch'] = multi_arch

    description = debian_data.get('description')
    homepage_url = debian_data.get('homepage')
    size = debian_data.get('installed')

    parties = []

    maintainer = debian_data.get('maintainer')
    if maintainer:
        party = models.Party(role='maintainer', name=maintainer)
        parties.append(party)

    orig_maintainer = debian_data.get('original_maintainer')
    if orig_maintainer:
        party = models.Party(role='original_maintainer', name=orig_maintainer)
        parties.append(party)

    keywords = []
    keyword = debian_data.get('section')
    if keyword:
        keywords.append(keyword)

    source_packages = []
    source = debian_data.get('source')
    if source:
        source_pkg_purl = PackageURL(type=package_type,
                                     name=source,
                                     namespace=distro).to_string()

        source_packages.append(source_pkg_purl)

    return models.PackageData(
        datasource_id=datasource_id,
        type=package_type,
        namespace=distro,
        name=name,
        version=version,
        qualifiers=qualifiers,
        description=description,
        homepage_url=homepage_url,
        size=size,
        source_packages=source_packages,
        keywords=keywords,
        parties=parties,
        extra_data=extra_data,
    )
Beispiel #18
0
    def parse(cls, location):
        rpm_tags = get_rpm_tags(location, include_desc=True)

        if TRACE: logger_debug('recognize: rpm_tags', rpm_tags)
        if not rpm_tags:
            return

        name = rpm_tags.name

        try:
            epoch = rpm_tags.epoch and int(rpm_tags.epoch) or None
        except ValueError:
            epoch = None

        evr = EVR(
            version=rpm_tags.version or None,
            release=rpm_tags.release or None,
            epoch=epoch).to_string()

        qualifiers = {}
        os = rpm_tags.os
        if os and os.lower() != 'linux':
            qualifiers['os'] = os

        arch = rpm_tags.arch
        if arch:
            qualifiers['arch'] = arch

        source_packages = []
        if rpm_tags.source_rpm:
            sepoch, sname, sversion, srel, sarch = nevra.from_name(rpm_tags.source_rpm)
            src_evr = EVR(sversion, srel, sepoch).to_string()
            src_qualifiers = {}
            if sarch:
                src_qualifiers['arch'] = sarch

            src_purl = models.PackageURL(
                type=cls.default_package_type,
                # TODO: namespace=cls.default_package_namespace,
                name=sname,
                version=src_evr,
                qualifiers=src_qualifiers
            ).to_string()

            if TRACE: logger_debug('recognize: source_rpm', src_purl)
            source_packages = [src_purl]

        parties = []

        # TODO: also use me to craft a namespace!!!
        # TODO: assign a namespace to Package URL based on distro names.
        # CentOS
        # Fedora Project
        # OpenMandriva Lx
        # openSUSE Tumbleweed
        # Red Hat

        if rpm_tags.distribution:
            parties.append(models.Party(name=rpm_tags.distribution, role='distributor'))

        if rpm_tags.vendor:
            parties.append(models.Party(name=rpm_tags.vendor, role='vendor'))

        description = build_description(summary=rpm_tags.summary, description=rpm_tags.description)

        if TRACE:
            data = dict(
                name=name,
                version=evr,
                description=description or None,
                homepage_url=rpm_tags.url or None,
                parties=parties,
                declared_license=rpm_tags.license or None,
                source_packages=source_packages,
            )
            logger_debug('recognize: data to create a package:\n', data)

        package = models.PackageData(
            datasource_id=cls.datasource_id,
            type=cls.default_package_type,
            # TODO: namespace=cls.default_package_namespace,
            name=name,
            version=evr,
            description=description or None,
            homepage_url=rpm_tags.url or None,
            parties=parties,
            declared_license=rpm_tags.license or None,
            source_packages=source_packages,
        )

        if TRACE:
            logger_debug('recognize: created package:\n', package)

        yield package
Beispiel #19
0
    def parse(cls, location):
        """
        Yield one or more Package manifest objects given a file ``location`` pointing to a
        package archive, manifest or similar.
        """
        with io.open(location, encoding='utf-8') as loc:
            package_data = saneyaml.load(loc.read())

        # About files can contain any purl and also have a namespace
        about_type = package_data.get('type')
        about_ns = package_data.get('namespace')
        purl_type = None
        purl_ns = None
        purl = package_data.get('purl')
        if purl:
            purl = PackageURL.from_string(purl)
            if purl:
                purl_type = purl.type

        package_type = about_type or purl_type or cls.default_package_type
        package_ns = about_ns or purl_ns

        name = package_data.get('name')
        version = package_data.get('version')

        homepage_url = package_data.get('home_url') or package_data.get(
            'homepage_url')
        download_url = package_data.get('download_url')
        copyright_statement = package_data.get('copyright')

        license_expression = package_data.get('license_expression')
        declared_license = license_expression

        owner = package_data.get('owner')
        if not isinstance(owner, str):
            owner = repr(owner)
        parties = [
            models.Party(type=models.party_person, name=owner, role='owner')
        ]

        # FIXME: also include notice_file and license_file(s) as file_references
        file_references = []
        about_resource = package_data.get('about_resource')
        if about_resource:
            file_references.append(models.FileReference(path=about_resource))

        # FIXME: we should put the unprocessed attributes in extra data
        yield models.PackageData(
            datasource_id=cls.datasource_id,
            type=package_type,
            namespace=package_ns,
            name=name,
            version=version,
            declared_license=declared_license,
            license_expression=license_expression,
            copyright=copyright_statement,
            parties=parties,
            homepage_url=homepage_url,
            download_url=download_url,
            file_references=file_references,
        )
Beispiel #20
0
def parse_metadata(location):
    """
    Return a Package object from the Python wheel 'metadata.json' file
    at 'location' or None. Check if the parent directory of 'location'
    contains both a 'METADATA' and a 'DESCRIPTION.rst' file to ensure
    this is a proper metadata.json file.
    """
    if not location or not location.endswith('metadata.json'):
        if TRACE: logger_debug('parse_metadata: not metadata.json:', location)
        return
    parent_dir = fileutils.parent_directory(location)
    # FIXME: is the absence of these two files a show stopper?
    paths = [
        os.path.join(parent_dir, n) for n in ('METADATA', 'DESCRIPTION.rst')
    ]
    if not all(os.path.exists(p) for p in paths):
        if TRACE: logger_debug('parse_metadata: not extra paths', paths)
        return

    with open(location, 'rb') as infs:
        infos = json.load(infs)

    extensions = infos.get('extensions')
    if TRACE: logger_debug('parse_metadata: extensions:', extensions)
    details = extensions and extensions.get('python.details')
    urls = details and details.get('project_urls')
    homepage_url = urls and urls.get('Home')

    parties = []
    if TRACE:
        logger_debug('parse_metadata: contacts:', details.get('contacts'))
    contacts = details and details.get('contacts') or []
    for contact in contacts:
        if TRACE: logger_debug('parse_metadata: contact:', contact)
        name = contact and contact.get('name')
        if not name:
            if TRACE: logger_debug('parse_metadata: no name:', contact)
            continue
        parties.append(
            models.Party(type=models.party_person, name=name, role='contact'))

    description = build_description(infos.get('summary'),
                                    infos.get('description'))

    classifiers = infos.get('classifiers')
    license_classifiers = []
    other_classifiers = []
    if classifiers:
        for classifier in classifiers:
            if classifier.startswith('License'):
                license_classifiers.append(classifier)
            else:
                other_classifiers.append(classifier)

    declared_license = {}
    lic = infos.get('license')
    if lic:
        declared_license['license'] = lic
    if license_classifiers:
        declared_license['classifiers'] = license_classifiers

    package = PythonPackage(
        name=infos.get('name'),
        version=infos.get('version'),
        description=description or None,
        declared_license=declared_license or None,
        homepage_url=homepage_url or None,
        parties=parties,
        keywords=other_classifiers,
    )
    return package
Beispiel #21
0
def parse_setup_py(location):
    """
    Return a PythonPackage built from setup.py data.
    """
    if not location or not location.endswith('setup.py'):
        return

    with open(location) as inp:
        setup_text = inp.read()

    setup_args = {}

    # Parse setup.py file and traverse the AST
    tree = ast.parse(setup_text)
    for statement in tree.body:
        # We only care about function calls or assignments to functions named
        # `setup` or `main`
        if (isinstance(statement, (ast.Expr, ast.Call, ast.Assign))
                and isinstance(statement.value, ast.Call)
                and isinstance(statement.value.func, ast.Name)
                # we also look for main as sometimes this is used instead of setup()
                and statement.value.func.id in ('setup', 'main')):

            # Process the arguments to the setup function
            for kw in getattr(statement.value, 'keywords', []):
                arg_name = kw.arg

                if isinstance(kw.value, ast.Str):
                    setup_args[arg_name] = kw.value.s

                elif isinstance(kw.value, (
                        ast.List,
                        ast.Tuple,
                        ast.Set,
                )):
                    # We collect the elements of a list if the element
                    # and tag function calls
                    value = [
                        elt.s for elt in kw.value.elts
                        if not isinstance(elt, ast.Call)
                    ]
                    setup_args[arg_name] = value

                # TODO:  what if isinstance(kw.value, ast.Dict)
                # or an expression like a call to version=get_version or version__version__

    package_name = setup_args.get('name')
    if not package_name:
        return

    description = build_description(
        setup_args.get('summary', ''),
        setup_args.get('description', ''),
    )

    parties = []
    author = setup_args.get('author')
    author_email = setup_args.get('author_email')
    homepage_url = setup_args.get('url')
    if author:
        parties.append(
            models.Party(type=models.party_person,
                         name=author,
                         email=author_email,
                         role='author',
                         url=homepage_url))
    elif author_email:
        parties.append(
            models.Party(type=models.party_person,
                         email=author_email,
                         role='author',
                         url=homepage_url))

    declared_license = {}
    license_setuptext = setup_args.get('license')
    declared_license['license'] = license_setuptext

    classifiers = setup_args.get('classifiers', [])
    license_classifiers = [c for c in classifiers if c.startswith('License')]
    declared_license['classifiers'] = license_classifiers

    other_classifiers = [c for c in classifiers if not c.startswith('License')]

    detected_version = setup_args.get('version')
    if not detected_version:
        # search for possible dunder versions here and elsewhere
        detected_version = detect_version_attribute(location)

    return PythonPackage(
        name=package_name,
        version=detected_version,
        description=description or None,
        homepage_url=setup_args.get('url') or None,
        parties=parties,
        declared_license=declared_license,
        keywords=other_classifiers,
    )
Beispiel #22
0
def build_package(package_data):
    """
    Return a cran Package object from a dictionary yaml data.
    """
    name = package_data.get('Package')
    if name:
        parties = []
        maintainers = package_data.get('Maintainer')
        if maintainers:
            for maintainer in maintainers.split(',\n'):
                name, email = get_party_info(maintainer)
                if name or email:
                    parties.append(
                        models.Party(
                            name=name,
                            role='maintainer',
                            email=email,
                        )
                    )
        authors = package_data.get('Author')
        if authors:
            for author in authors.split(',\n'):
                name, email = get_party_info(author)
                if name or email:
                    parties.append(
                        models.Party(
                            name=name,
                            role='author',
                            email=email,
                        )
                    )
        package_dependencies = []
        dependencies = package_data.get('Depends')
        if dependencies:
            for dependency in dependencies.split(',\n'):
                requirement = None
                for splitter in ('==', '>=',  '<=', '>', '<'):
                    if splitter in dependency:
                        splits = dependency.split(splitter)
                        # Replace the package name and keep the relationship and version
                        # For example: R (>= 2.1)
                        requirement = dependency.replace(splits[0], '').strip().strip(')').strip()
                        dependency = splits[0].strip().strip('(').strip()
                        break
                package_dependencies.append(
                    models.DependentPackage(
                        purl=PackageURL(
                            type='cran', name=dependency).to_string(),
                        requirement=requirement,
                        scope='dependencies',
                        is_runtime=True,
                        is_optional=False,
                    )
                )
        package = CranPackage(
            name=name,
            version = package_data.get('Version'),
            description = package_data.get('Description', '') or package_data.get('Title', ''),
            declared_license = package_data.get('License'),
            parties = parties,
            dependencies = package_dependencies,
            #TODO: Let's handle the release date as a Date type
            #release_date = package_data.get('Date/Publication'),
        )
        return package
Beispiel #23
0
def build_opam_package(opams):
    """
    Return a Package from a opam file or None.
    """
    package_dependencies = []
    deps = opams.get('depends') or []
    for dep in deps:
        package_dependencies.append(
            models.DependentPackage(
                purl=dep.purl,
                requirement=dep.version,
                scope='dependency',
                is_runtime=True,
                is_optional=False,
                is_resolved=False,
            )
        )

    name = opams.get('name')
    version = opams.get('version')
    homepage_url = opams.get('homepage')
    download_url = opams.get('src')
    vcs_url = opams.get('dev-repo')
    bug_tracking_url = opams.get('bug-reports')
    declared_license = opams.get('license')
    sha1 = opams.get('sha1')
    md5 = opams.get('md5')
    sha256 = opams.get('sha256')
    sha512 = opams.get('sha512')

    short_desc = opams.get('synopsis') or ''
    long_desc = opams.get('description') or ''
    if long_desc == short_desc:
        long_desc = None
    descriptions = [d for d in (short_desc, long_desc) if d and d.strip()]
    description = '\n'.join(descriptions)

    parties = []
    authors = opams.get('authors') or []
    for author in authors:
        parties.append(
            models.Party(
                type=models.party_person,
                name=author,
                role='author'
            )
        )
    maintainers = opams.get('maintainer') or []
    for maintainer in maintainers:
        parties.append(
            models.Party(
                type=models.party_person,
                email=maintainer,
                role='maintainer'
            )
        )

    package = OpamPackage(
        name=name,
        version=version,
        vcs_url=vcs_url,
        homepage_url=homepage_url,
        download_url=download_url,
        sha1=sha1,
        md5=md5,
        sha256=sha256,
        sha512=sha512,
        bug_tracking_url=bug_tracking_url,
        declared_license=declared_license,
        description=description,
        parties=parties,
        dependencies=package_dependencies
    )

    return package
Beispiel #24
0
def build_package(cls, pubspec_data):
    """
    Return a package object from a package data mapping or None
    """
    name = pubspec_data.get('name')
    version = pubspec_data.get('version')
    description = pubspec_data.get('description')
    homepage_url = pubspec_data.get('homepage')
    declared_license = pubspec_data.get('license')
    vcs_url = pubspec_data.get('repository')
    download_url = pubspec_data.get('archive_url')

    # Author and authors are deprecated
    authors = []
    author = pubspec_data.get('author')
    if author:
        authors.append(author)
    authors.extend(pubspec_data.get('authors') or [])

    parties = []
    for auth in authors:
        parties.append(
            models.Party(type=models.party_person, role='author', name=auth))

    package_dependencies = []
    dependencies = collect_deps(
        pubspec_data,
        'dependencies',
        is_runtime=True,
        is_optional=False,
    )
    package_dependencies.extend(dependencies)

    dev_dependencies = collect_deps(
        pubspec_data,
        'dev_dependencies',
        is_runtime=False,
        is_optional=True,
    )
    package_dependencies.extend(dev_dependencies)

    env_dependencies = collect_deps(
        pubspec_data,
        'environment',
        is_runtime=True,
        is_optional=False,
    )
    package_dependencies.extend(env_dependencies)

    extra_data = {}

    def add_to_extra_if_present(_key):
        _value = pubspec_data.get(_key)
        if _value:
            extra_data[_key] = _value

    add_to_extra_if_present('issue_tracker')
    add_to_extra_if_present('documentation')
    add_to_extra_if_present('dependencies_overrides')
    add_to_extra_if_present('executables')
    add_to_extra_if_present('publish_to')

    package = cls(
        name=name,
        version=version,
        vcs_url=vcs_url,
        description=description,
        declared_license=declared_license,
        parties=parties,
        homepage_url=homepage_url,
        dependencies=package_dependencies,
        extra_data=extra_data,
    )

    if not download_url:
        package.download_url = package.repository_download_url()

    return package
Beispiel #25
0
def parse(location):
    """
    Return a Nuget package from a nuspec XML file at `location`.
    Return None if this is not a parsable nuspec.
    """
    parsed = _parse_nuspec(location)
    if TRACE:
        logger_debug('parsed:', parsed)
    if not parsed:
        return

    pack = parsed.get('package', {}) or {}
    nuspec = pack.get('metadata')
    if not nuspec:
        return

    name = nuspec.get('id')
    version = nuspec.get('version')

    # Summary: A short description of the package for UI display. If omitted, a
    # truncated version of description is used.
    description = build_description(nuspec.get('summary'),
                                    nuspec.get('description'))

    # title: A human-friendly title of the package, typically used in UI
    # displays as on nuget.org and the Package Manager in Visual Studio. If not
    # specified, the package ID is used.
    title = nuspec.get('title')
    if title and title != name:
        description = build_description(nuspec.get('title'), description)

    parties = []
    authors = nuspec.get('authors')
    if authors:
        parties.append(models.Party(name=authors, role='author'))

    owners = nuspec.get('owners')
    if owners:
        parties.append(models.Party(name=owners, role='owner'))

    repo = nuspec.get('repository') or {}
    vcs_tool = repo.get('@type') or ''
    vcs_repository = repo.get('@url') or ''
    vcs_url = None
    if vcs_repository:
        if vcs_tool:
            vcs_url = '{}+{}'.format(vcs_tool, vcs_repository)
        else:
            vcs_url = vcs_repository

    package = NugetPackage(
        name=name,
        version=version,
        description=description or None,
        homepage_url=nuspec.get('projectUrl') or None,
        parties=parties,
        declared_license=nuspec.get('licenseUrl') or None,
        copyright=nuspec.get('copyright') or None,
        vcs_url=vcs_url,
    )
    return package
Beispiel #26
0
def build_package(package_data, datasource_id):
    """
    Return a PackageData object from a package_data mapping from a metadata.json
    or similar or None.
    """
    name = package_data.get('name')
    version = package_data.get('version')

    maintainer_name = package_data.get('maintainer', '')
    maintainer_email = package_data.get('maintainer_email', '')
    parties = []
    if maintainer_name or maintainer_email:
        parties.append(
            models.Party(
                name=maintainer_name.strip() or None,
                role='maintainer',
                email=maintainer_email.strip() or None,
            ))

    # TODO: combine descriptions as done elsewhere
    description = package_data.get('description', '') or package_data.get(
        'long_description', '')
    lic = package_data.get('license', '')
    declared_license = None
    license_expression = None
    if lic:
        declared_license = lic.strip()
        if declared_license:
            license_expression = models.compute_normalized_license(
                declared_license)
    code_view_url = package_data.get('source_url', '')
    bug_tracking_url = package_data.get('issues_url', '')

    deps = dict(package_data.get('dependencies', {}) or {})
    deps.update(package_data.get('depends', {}) or {})

    dependencies = []
    for dependency_name, requirement in deps.items():
        dependencies.append(
            models.DependentPackage(
                purl=PackageURL(type='chef', name=dependency_name).to_string(),
                scope='dependencies',
                extracted_requirement=requirement,
                is_runtime=True,
                is_optional=False,
            ))

    yield models.PackageData(
        datasource_id=datasource_id,
        type=ChefMetadataJsonHandler.default_package_type,
        name=name,
        version=version,
        parties=parties,
        description=description.strip() or None,
        declared_license=declared_license,
        license_expression=license_expression,
        code_view_url=code_view_url.strip() or None,
        bug_tracking_url=bug_tracking_url.strip() or None,
        dependencies=dependencies,
        primary_language='Ruby',
        **get_urls(name, version),
    )
def build_rubygem_package(cls, gem_data, download_url=None, package_url=None):
    """
    Return a Package built from a Gem `gem_data` mapping or None.
    The `gem_data can come from a .gemspec or .gem/gem_data.
    Optionally use the provided `download_url` and `purl` strings.
    """
    if not gem_data:
        return

    name = gem_data.get('name')

    short_desc = gem_data.get('summary') or ''
    long_desc = gem_data.get('description') or ''
    if long_desc == short_desc:
        long_desc = None
    descriptions = [d for d in (short_desc, long_desc) if d and d.strip()]
    description = '\n'.join(descriptions)

    # Since the gem spec doc is not clear https://guides.rubygems.org
    # /specification-reference/#licenseo, we will treat a list of licenses and a
    # conjunction for now (e.g. AND)
    lic = gem_data.get('license')
    licenses = gem_data.get('licenses')
    declared_license = licenses_mapper(lic, licenses)

    package_manifest = cls(name=name,
                           description=description,
                           homepage_url=gem_data.get('homepage'),
                           download_url=download_url,
                           declared_license=declared_license)

    # we can have one singular or a plural list of authors
    authors = gem_data.get('authors') or []
    # or a string of coma-sperated authors (in the Rubygems API)
    if isinstance(authors, str):
        authors = [a.strip() for a in authors.split(',') if a.strip()]
    authors.append(gem_data.get('author') or '')
    for author in authors:
        if author and author.strip():
            party = models.Party(name=author, role='author')
            package_manifest.parties.append(party)

    # TODO: we have a email that is either a string or a list of string

    # date: 2019-01-09 00:00:00.000000000 Z
    date = gem_data.get('date')
    if date and len(date) >= 10:
        date = date[:10]
        package_manifest.release_date = date[:10]

    # there are two levels of nesting
    version1 = gem_data.get('version') or {}
    version = version1.get('version') or None
    package_manifest.version = version
    package_manifest.set_purl(package_url)

    metadata = gem_data.get('metadata') or {}
    if metadata:
        homepage_url = metadata.get('homepage_uri')
        if homepage_url:
            if not package_manifest.homepage_url:
                package_manifest.homepage_url = homepage_url
            elif package_manifest.homepage_url == homepage_url:
                pass
            else:
                # we have both and one is wrong.
                # we prefer the existing one from the metadata
                pass

        package_manifest.bug_tracking_url = metadata.get('bug_tracking_uri')

        source_code_url = metadata.get('source_code_uri')
        if source_code_url:
            package_manifest.code_view_url = source_code_url
            # TODO: infer purl and add purl to package_manifest.source_packages

        # not used for now
        #   "changelog_uri"     => "https://example.com/user/bestgemever/CHANGELOG.md",
        #   "wiki_uri"          => "https://example.com/user/bestgemever/wiki"
        #   "mailing_list_uri"  => "https://groups.example.com/bestgemever",
        #   "documentation_uri" => "https://www.example.info/gems/bestgemever/0.0.1",

    platform = gem_data.get('platform')
    if platform != 'ruby':
        qualifiers = dict(platform=platform)
        if not package_manifest.qualifiers:
            package_manifest.qualifiers = {}

        package_manifest.qualifiers.update(qualifiers)

    package_manifest.dependencies = get_dependencies(
        gem_data.get('dependencies'))

    if not package_manifest.download_url:
        package_manifest.download_url = package_manifest.repository_download_url(
        )

    if not package_manifest.homepage_url:
        package_manifest.homepage_url = package_manifest.repository_homepage_url(
        )

    return package_manifest
Beispiel #28
0
def build_rubygem_package_data(gem_data, datasource_id):
    """
    Return a PackageData for ``datasource_id`` built from a Gem `gem_data`
    mapping or None. The ``gem_data`` can come from a .gemspec or .gem/metadata.
    Optionally use the provided ``download_url`` and `package_url`` strings.
    """
    if not gem_data:
        return

    metadata = gem_data.get('metadata') or {}

    name = gem_data.get('name')
    # there are two levels of nesting for version:
    version1 = gem_data.get('version') or {}
    version = version1.get('version') or None

    platform = gem_data.get('platform')
    if platform != 'ruby':
        qualifiers = dict(platform=platform)
    else:
        qualifiers = {}

    description = build_description(
        summary=gem_data.get('summary'),
        description=gem_data.get('description'),
    )

    # Since the gem spec doc is not clear wrt. to the default being OR or AND
    # we will treat a list of licenses and a conjunction for now (e.g. AND)
    # See https://guides.rubygems.org/specification-reference/#licenseo
    lic = gem_data.get('license')
    licenses = gem_data.get('licenses')
    declared_license = licenses_mapper(lic, licenses)

    # we may have tow homepages and one may be wrong.
    # we prefer the one from the metadata
    homepage_url = metadata.get('homepage_uri')
    if not homepage_url:
        homepage_url = gem_data.get('homepage')

    urls = get_urls(name, version, platform)
    dependencies = get_dependencies(gem_data.get('dependencies'))
    file_references = get_file_references(metadata.get('files'))

    package_data = models.PackageData(
        datasource_id=datasource_id,
        type=GemArchiveHandler.default_package_type,
        primary_language=GemArchiveHandler.default_primary_language,
        name=name,
        version=version,
        qualifiers=qualifiers,
        description=description,
        homepage_url=homepage_url,
        declared_license=declared_license,
        bug_tracking_url=metadata.get('bug_tracking_uri'),
        code_view_url=metadata.get('source_code_uri'),
        file_references=file_references,
        dependencies=dependencies,
        **urls,
    )

    # we can have one singular or a plural list of authors
    authors = gem_data.get('authors') or []
    # or a string of coma-sperated authors (in the Rubygems API)
    if isinstance(authors, str):
        authors = [a.strip() for a in authors.split(',') if a.strip()]
    authors.append(gem_data.get('author') or '')
    for author in authors:
        if author and author.strip():
            party = models.Party(name=author, role='author')
            package_data.parties.append(party)

    # TODO: we have an email that is either a string or a list of string

    # date: 2019-01-09 00:00:00.000000000 Z
    date = gem_data.get('date')
    if date and len(date) >= 10:
        date = date[:10]
        package_data.release_date = date[:10]

    # TODO: infer source purl and add purl to package_data.source_packages

    # not used for now
    #   "changelog_uri"     => "https://example.com/user/bestgemever/CHANGELOG.md",
    #   "wiki_uri"          => "https://example.com/user/bestgemever/wiki"
    #   "mailing_list_uri"  => "https://groups.example.com/bestgemever",
    #   "documentation_uri" => "https://www.example.info/gems/bestgemever/0.0.1",

    if not package_data.homepage_url:
        package_data.homepage_url = rubygems_homepage_url(name, version)

    if not package_data.license_expression and package_data.declared_license:
        package_data.license_expression = models.compute_normalized_license(
            package_data.declared_license)

    return package_data
Beispiel #29
0
def build_xcode_package(podspec_json_data):
    """
    Return a Package object from a podspec.json package data mapping.
    """
    name = podspec_json_data.get('name')
    version = podspec_json_data.get('version')
    summary = podspec_json_data.get('summary', '')
    description = podspec_json_data.get('description', '')
    homepage_url = podspec_json_data.get('homepage')

    license = podspec_json_data.get('license')
    if isinstance(license, dict):
        declared_license = ' '.join(list(license.values()))
    else:
        declared_license = license

    source = podspec_json_data.get('source')
    vcs_url = None
    download_url = None

    if isinstance(source, dict):
        git_url = source.get('git', '')
        http_url = source.get('http', '')
        if git_url:
            vcs_url = git_url
        elif http_url:
            download_url = http_url

    if not vcs_url:
        vcs_url = source

    authors = podspec_json_data.get('authors') or {}

    license_matches = get_license_matches(query_string=declared_license)
    if not license_matches:
        license_expression = 'unknown'
    else:
        license_expression = get_license_expression_from_matches(
            license_matches)

    if summary and not description.startswith(summary):
        desc = [summary]
        if description:
            desc += [description]
        description = '. '.join(desc)

    parties = []
    if authors:
        if isinstance(authors, dict):
            for key, value in authors.items():
                party = models.Party(type=models.party_org,
                                     name=key,
                                     url=value + '.com',
                                     role='owner')
                parties.append(party)
        else:
            party = models.Party(type=models.party_org,
                                 name=authors,
                                 role='owner')
            parties.append(party)

    extra_data = {}
    extra_data['source'] = podspec_json_data['source']
    dependencies = podspec_json_data.get('dependencies', '')
    if dependencies:
        extra_data['dependencies'] = dependencies
    extra_data['podspec.json'] = podspec_json_data

    package = CocoapodsPackage(
        name=name,
        version=version,
        vcs_url=vcs_url,
        description=description,
        declared_license=declared_license,
        license_expression=license_expression,
        homepage_url=homepage_url,
        download_url=download_url,
        parties=parties,
    )

    package.api_data_url = package.get_api_data_url()

    return package
Beispiel #30
0
    def parse(cls, location):
        """
        Yield one or more Package manifest objects given a file ``location`` pointing to a
        package_data archive, manifest or similar.

        {
            "name": "haxelib",
            "url" : "https://lib.haxe.org/documentation/",
            "license": "GPL",
            "tags": ["haxelib", "core"],
            "description": "The haxelib client",
            "classPath": "src",
            "version": "3.4.0",
            "releasenote": " * Fix password input issue in Windows (#421).\n * ....",
            "contributors": ["back2dos", "ncannasse", "jason", "Simn", "nadako", "andyli"]
        }
        """
        with io.open(location, encoding='utf-8') as loc:
            json_data = json.load(loc)

        name = json_data.get('name')
        version = json_data.get('version')

        package_data = models.PackageData(
            datasource_id=cls.datasource_id,
            type=cls.default_package_type,
            name=name,
            version=version,
            homepage_url=json_data.get('url'),
            declared_license=json_data.get('license'),
            keywords=json_data.get('tags'),
            description=json_data.get('description'),
            primary_language=cls.default_primary_language,
        )

        if not package_data.license_expression and package_data.declared_license:
            package_data.license_expression = cls.compute_normalized_license(package_data)

        if name and version:
            download_url = f'https://lib.haxe.org/p/{name}/{version}/download/'
            package_data.repository_download_url = download_url
            package_data.download_url = download_url

        if name:
            package_data.repository_homepage_url = f'https://lib.haxe.org/p/{name}'

        for contrib in json_data.get('contributors', []):
            party = models.Party(
                type=models.party_person,
                name=contrib,
                role='contributor',
                url='https://lib.haxe.org/u/{}'.format(contrib))
            package_data.parties.append(party)

        for dep_name, dep_version in json_data.get('dependencies', {}).items():
            dep_version = dep_version and dep_version.strip()
            is_resolved = bool(dep_version)
            dep_purl = PackageURL(
                type=cls.default_package_type,
                name=dep_name,
                version=dep_version
            ).to_string()
            dep = models.DependentPackage(purl=dep_purl, is_resolved=is_resolved,)
            package_data.dependencies.append(dep)

        yield package_data