コード例 #1
0
    def _get_lgpl_doc(self, or_later=False):
        doc = Document(
            Version(2, 1),
            License.from_identifier('CC0-1.0'),
            'Sample_Document-V2.1',
            spdx_id='SPDXRef-DOCUMENT',
            namespace=
            'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301'
        )
        doc.creation_info.add_creator(Tool('ScanCode'))
        doc.creation_info.set_created_now()

        package = doc.package = Package(name='some/path',
                                        download_location=NoAssert())
        package.cr_text = 'Some copyrught'
        package.verif_code = 'SOME code'
        package.license_declared = NoAssert()
        package.conc_lics = NoAssert()

        file1 = File('./some/path/tofile')
        file1.name = './some/path/tofile'
        file1.spdx_id = 'SPDXRef-File'
        file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
        file1.conc_lics = NoAssert()
        file1.copyright = NoAssert()

        lic1 = License.from_identifier('LGPL-2.1')
        if or_later:
            lic1 = License.from_identifier('LGPL-2.1+')

        file1.add_lics(lic1)

        package.add_lics_from_file(lic1)
        package.add_file(file1)
        return doc
コード例 #2
0
    def test_document_is_valid_when_using_or_later_licenses(self):
        doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
        doc.creation_info.add_creator(Tool('ScanCode'))
        doc.creation_info.set_created_now()

        package = doc.package = Package(name='some/path',
                                        download_location=NoAssert())
        package.cr_text = 'Some copyrught'
        package.verif_code = 'SOME code'
        package.license_declared = NoAssert()
        package.conc_lics = NoAssert()

        file1 = File('./some/path/tofile')
        file1.name = './some/path/tofile'
        file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
        file1.conc_lics = NoAssert()
        file1.copyright = NoAssert()

        lic1 = License.from_identifier('LGPL-2.1+')
        file1.add_lics(lic1)

        package.add_lics_from_file(lic1)
        package.add_file(file1)
        messages = []
        is_valid = doc.validate(messages)
        assert is_valid
        assert not messages
コード例 #3
0
 def test_document_validate_failures_returns_informative_messages(self):
     doc = Document(
         Version(2, 1),
         License.from_identifier('CC0-1.0'),
         'Sample_Document-V2.1',
         spdx_id='SPDXRef-DOCUMENT',
         namespace=
         'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301'
     )
     pack = doc.package = Package('some/path', NoAssert())
     file1 = File('./some/path/tofile')
     file1.name = './some/path/tofile'
     file1.spdx_id = 'SPDXRef-File'
     file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
     lic1 = License.from_identifier('LGPL-2.1-only')
     file1.add_lics(lic1)
     pack.add_lics_from_file(lic1)
     messages = []
     messages = doc.validate(messages)
     expected = [
         'No creators defined, must have at least one.',
         'Creation info missing created date.',
         'Package checksum must be instance of spdx.checksum.Algorithm',
         'Package download_location can not be None.',
         'Package verif_code can not be None.',
         'Package cr_text can not be None.',
         'Package must have at least one file.',
         'Package concluded license must be instance of spdx.utils.SPDXNone '
         'or spdx.utils.NoAssert or spdx.document.License',
         'Package declared license must be instance of spdx.utils.SPDXNone '
         'or spdx.utils.NoAssert or spdx.document.License'
     ]
     assert expected == messages
コード例 #4
0
    def test_document_is_valid_when_using_or_later_licenses(self):
        doc = Document(
            Version(2, 1),
            License.from_identifier('CC0-1.0'),
            'Sample_Document-V2.1',
            spdx_id='SPDXRef-DOCUMENT',
            namespace=
            'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301'
        )
        doc.creation_info.add_creator(Tool('ScanCode'))
        doc.creation_info.set_created_now()

        package = doc.package = Package(name='some/path',
                                        download_location=NoAssert())
        package.spdx_id = 'SPDXRef-Package'
        package.cr_text = 'Some copyrught'
        package.verif_code = 'SOME code'
        package.license_declared = NoAssert()
        package.conc_lics = NoAssert()

        file1 = File('./some/path/tofile')
        file1.name = './some/path/tofile'
        file1.spdx_id = 'SPDXRef-File'
        file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
        file1.conc_lics = NoAssert()
        file1.copyright = NoAssert()

        lic1 = License.from_identifier('LGPL-2.1-or-later')
        file1.add_lics(lic1)

        package.add_lics_from_file(lic1)
        package.add_file(file1)
        messages = ErrorMessages()
        messages = doc.validate(messages)
        assert not messages
コード例 #5
0
    def _get_lgpl_doc(self, or_later=False):
        doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
        doc.creation_info.add_creator(Tool('ScanCode'))
        doc.creation_info.set_created_now()

        package = doc.package = Package(name='some/path',
                                        download_location=NoAssert())
        package.cr_text = 'Some copyrught'
        package.verif_code = 'SOME code'
        package.license_declared = NoAssert()
        package.conc_lics = NoAssert()

        file1 = File('./some/path/tofile')
        file1.name = './some/path/tofile'
        file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
        file1.conc_lics = NoAssert()
        file1.copyright = NoAssert()

        lic1 = License.from_identifier('LGPL-2.1')
        if or_later:
            lic1 = License.from_identifier('LGPL-2.1+')

        file1.add_lics(lic1)

        package.add_lics_from_file(lic1)
        package.add_file(file1)
        return doc
コード例 #6
0
def test_generate_file_report_exception(fake_repository):
    """Simple generate test to test if the exception is detected."""
    project = Project(fake_repository)
    result = FileReport.generate(project, "src/exception.py")
    assert set(result.file_report.spdxfile.licenses_in_file) == {
        License.from_identifier("GPL-3.0-or-later"),
        License.from_identifier("Autoconf-exception-3.0"),
    }
    assert result.file_report.spdxfile.copyright == "2017 Mary Sue"
    assert not result.bad_licenses
    assert not result.missing_licenses
コード例 #7
0
 def test_document_validate_failures_returns_informative_messages(self):
     doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
     pack = doc.package = Package('some/path', NoAssert())
     file1 = File('./some/path/tofile')
     file1.name = './some/path/tofile'
     file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
     lic1 = License.from_identifier('LGPL-2.1')
     file1.add_lics(lic1)
     pack.add_lics_from_file(lic1)
     messages = []
     is_valid = doc.validate(messages)
     assert not is_valid
     expected = ['No creators defined, must have at least one.']
     assert expected == messages
コード例 #8
0
    def test_document_multiple_packages(self):
        doc = Document(
            Version(2, 1),
            License.from_identifier('CC0-1.0'),
            'Sample_Document-V2.1',
            spdx_id='SPDXRef-DOCUMENT',
            namespace=
            'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301'
        )
        doc.creation_info.add_creator(Tool('ScanCode'))
        doc.creation_info.set_created_now()

        package1 = Package(name='some/path1', download_location=NoAssert())
        package1.spdx_id = 'SPDXRef-Package1'
        package1.cr_text = 'Some copyrught'
        package1.files_verified = False
        package1.license_declared = NoAssert()
        package1.conc_lics = NoAssert()
        doc.add_package(package1)

        package2 = Package(name='some/path2', download_location=NoAssert())
        package2.spdx_id = 'SPDXRef-Package2'
        package2.cr_text = 'Some copyrught'
        package2.files_verified = False
        package2.license_declared = NoAssert()
        package2.conc_lics = NoAssert()
        doc.add_package(package2)

        assert len(doc.packages) == 2
コード例 #9
0
def handle_file_licenses(licenses: list, licenses_dict: dict) -> tuple:
    def create_ext_license(name):
        ext_license = ExtractedLicense(identifier=name)
        ext_license.full_name = name
        ext_license.text = name

        return ext_license

    found_lics = set()
    extracted_licenses = list()
    for lic in licenses:
        fix_license(lic)  # TODO: MOVE TO SDK
        try:
            spdx_license_dict = licenses_dict[lic['spdxName']]
            logging.debug(f"Found license: {spdx_license_dict['licenseId']}")
            spdx_license = License(full_name=spdx_license_dict['licenseId'],
                                   identifier=lic['spdxName'])
            found_lics.add(spdx_license)
            if spdx_license_dict['isDeprecatedLicenseId']:
                logging.debug(f"License {lic['spdxName']} is deprecated")
                extracted_licenses.append(create_ext_license(lic['spdxName']))
        except KeyError:
            logging.warning(
                f"License with identifier: {lic['name']} was not found")
            create_ext_license(lic['name'])
            extracted_licenses.append(create_ext_license(lic['name']))

    if not found_lics:
        found_lics.add(NoAssert())

    return found_lics, extracted_licenses
コード例 #10
0
 def test_creation(self):
     document = Document(version=Version(major=1, minor=2),
                         data_license=License(
                             full_name='Academic Free License v1.1',
                             identifier='AFL-1.1'))
     assert document.comment is None
     assert document.version == Version(1, 2)
     assert document.data_license.identifier == 'AFL-1.1'
コード例 #11
0
    def _get_mini_doc(self, ):
        doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
        doc.creation_info.add_creator(Tool('ScanCode'))
        doc.creation_info.set_created_now()

        package = doc.package = Package(download_location=NoAssert())
        package.license_declared = NoAssert()
        package.conc_lics = NoAssert()
        return doc
コード例 #12
0
def get_license_obj(lic_id: str, licenses_dict: dict) -> License:
    lic_id_dict = licenses_dict.get(lic_id)
    if lic_id_dict:
        lic_obj = License(full_name=lic_id_dict['name'],
                          identifier=lic_id_dict['licenseId'])
    else:
        lic_obj = NoAssert()

    return lic_obj
コード例 #13
0
    def generate_spdx_document(self) -> Document:
        """Generates the SPDX document.

        Example of SPDX document section.
        SPDXVersion: SPDX-2.1
        DataLicense: CC0-1.0
        SPDXID: SPDXRef-DOCUMENT
        DocumentName: mbed-targets
        DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-3c4714e6-a7b1-4574-abb8-861149cbc590
        Creator: Person: Anonymous ()
        Creator: Organization: Anonymous ()
        Creator: Tool: reuse-0.8.0
        Created: 2020-01-20T17:53:41Z
        CreatorComment: <text>
        This document was created automatically using available reuse information consistent with REUSE.
        </text>

        Returns:
            the corresponding document
        """
        doc = Document()
        doc.version = Version(1, 2)
        doc.name = determine_spdx_value(self.document_name)
        doc.namespace = determine_spdx_value(self.document_namespace)
        doc.spdx_id = "SPDXRef-DOCUMENT"
        doc.comment = determine_spdx_value(
            "This document was created automatically using available information from python packages."
        )
        doc.data_license = License.from_identifier("CC0-1.0")
        doc.creation_info.add_creator(Person(self.author, self.author_email))
        if not self._is_dependency:
            doc.creation_info.add_creator(
                Organization(self.organisation, self.organisation_email))
        doc.creation_info.add_creator(Tool(self.tool_name))
        doc.creation_info.set_created_now()
        if not self._is_dependency:
            review = Review(
                Person(
                    determine_spdx_value(self.reviewer),
                    determine_spdx_value(self.reviewer_email),
                ))
            review.set_review_date_now()
            doc.add_review(review)

        # FIXME with current tooling and specification, only one package can
        #  be described in a file and hence, all dependencies are described
        #  in separate files. Find out what to do with dependencies when new
        #  tools are released as it is not entirely clear in the specification
        doc.package = self.generate_spdx_package().generate_spdx_package()

        for external_reference in self.external_refs:
            doc.add_ext_document_reference(
                external_reference.generate_external_reference())
        return doc
コード例 #14
0
def test_generate_file_report_file_simple(fake_repository):
    """An extremely simple generate test, just to see if the function doesn't
    crash.
    """
    project = Project(fake_repository)
    result = FileReport.generate(project, "src/source_code.py")
    assert result.file_report.spdxfile.licenses_in_file == [
        License.from_identifier("GPL-3.0-or-later")
    ]
    assert result.file_report.spdxfile.copyright == "2017 Mary Sue"
    assert not result.bad_licenses
    assert not result.missing_licenses
コード例 #15
0
def test_generate_file_report_file_from_different_cwd(fake_repository):
    """Another simple generate test, but from a different CWD."""
    os.chdir("/")
    project = Project(fake_repository)
    result = FileReport.generate(project,
                                 fake_repository / "src/source_code.py")
    assert result.file_report.spdxfile.licenses_in_file == [
        License.from_identifier("GPL-3.0-or-later")
    ]
    assert result.file_report.spdxfile.copyright == "2017 Mary Sue"
    assert not result.bad_licenses
    assert not result.missing_licenses
コード例 #16
0
 def test_document_validate_failures_returns_informative_messages(self):
     doc = Document(
         Version(2, 1),
         License.from_identifier('CC0-1.0'),
         'Sample_Document-V2.1',
         spdx_id='SPDXRef-DOCUMENT',
         namespace=
         'https://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301'
     )
     pack = doc.package = Package('some/path', NoAssert())
     file1 = File('./some/path/tofile')
     file1.name = './some/path/tofile'
     file1.spdx_id = 'SPDXRef-File'
     file1.chk_sum = Algorithm('SHA1', 'SOME-SHA1')
     lic1 = License.from_identifier('LGPL-2.1')
     file1.add_lics(lic1)
     pack.add_lics_from_file(lic1)
     messages = []
     is_valid = doc.validate(messages)
     assert not is_valid
     expected = ['No creators defined, must have at least one.']
     assert expected == messages
コード例 #17
0
    def generate_spdx_file(self) -> File:
        """Generates the SPDX file.

        SPDX File example:
        FileName: ./tests/test_mbed_targets.py
        SPDXID: SPDXRef-cb9cce30c285e6083c2d19a463cbe592
        FileChecksum: SHA1: d3db49873bd2b1cab45bf81e7d88617dea6caaff
        LicenseConcluded: NOASSERTION
        FileCopyrightText: NONE

        Returns:
            the corresponding file
        """
        source_file = File(determine_spdx_value(self.unix_relative_path))
        source_file.type = FileType.SOURCE
        source_file.comment = determine_spdx_value(None)
        source_file.chk_sum = Algorithm("SHA1", self.sha1_check_sum)
        source_file.conc_lics = License.from_identifier(
            str(determine_spdx_value(self.licence)))
        source_file.spdx_id = f"SPDXRef-{self.id}"
        source_file.copyright = determine_spdx_value(self.copyright)
        source_file.add_lics(
            License.from_identifier(str(determine_spdx_value(self.licence))))
        return source_file
コード例 #18
0
    def serialize(self, packages: List[Package]) -> str:
        doc = Document(name="Translated SBOM",
                       namespace=SPDXNone(),
                       spdx_id="SPDXRef-DOCUMENT")
        doc.version = Version(2, 1)
        doc.comment = 'Translated with Decoder Ring'
        doc.data_license = License.from_identifier('CC0-1.0')
        doc.creation_info.add_creator(Tool("Decoder Ring"))
        doc.creation_info.set_created_now()

        # form SPDX ids within the document just use a simple counter
        id_count = [
            1
        ]  # python closure trick, it needs to be a mutable object like a list for closure to work

        def add_package(package, parent=None):
            """ Function to recursively add a package and it's deps"""
            spdxpackage = SpdxPackage(name=package.package_name,
                                      version=package.version)
            spdxpackage.spdx_id = f'SPDXRef-{id_count[0]}'
            id_count[0] += 1
            spdxpackage.homepage = SPDXNone()
            spdxpackage.cr_text = NoAssert()
            spdxpackage.download_location = UnKnown()
            spdxpackage.files_analyzed = False
            spdxpackage.conc_lics = NoAssert()
            spdxpackage.license_declared = NoAssert()
            spdxpackage.licenses_from_files = [NoAssert()]
            # if we have a parent be sure to list the relationship
            if parent != None:
                spdxpackage.add_relationship(
                    Relationship(spdxpackage, RelationshipOptions.PACKAGE_OF,
                                 parent))

            # go through the same process for depenedencies
            for dep in package.dependencies:
                add_package(dep, parent=spdxpackage)

            # finally add it to the document
            doc.add_package(spdxpackage)

        for package in packages:
            add_package(package)

        out = io.StringIO()
        write_document(doc, out)
        return out.getvalue()
コード例 #19
0
 def test_creation(self):
     document = Document(version=Version(major=2, minor=1),
                         data_license=License(
                             full_name='Academic Free License v1.1',
                             identifier='AFL-1.1'))
     document.add_ext_document_reference(
         ExternalDocumentRef(
             'DocumentRef-spdx-tool-2.1',
             'https://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301',
             Algorithm('SHA1', 'SOME-SHA1')))
     assert document.comment is None
     assert document.version == Version(2, 1)
     assert document.data_license.identifier == 'AFL-1.1'
     assert document.ext_document_references[
         -1].external_document_id == 'DocumentRef-spdx-tool-2.1'
     assert document.ext_document_references[
         -1].spdx_document_uri == 'https://spdx.org/spdxdocs/spdx-tools-v2.1-3F2504E0-4F89-41D3-9A0C-0305E82C3301'
     assert document.ext_document_references[
         -1].check_sum.identifier == 'SHA1'
     assert document.ext_document_references[
         -1].check_sum.value == 'SOME-SHA1'
コード例 #20
0
def create_document(token: str) -> Document:
    logging.debug(f"Creating SBOM Document section")
    global ws_conn
    scope_name = ws_conn.get_scope_name_by_token(token)
    document = Document(name=f"WhiteSource {scope_name} SBOM report",
                        namespace=extra_conf.get('namespace'),
                        spdx_id="SPDXRef-DOCUMENT",
                        version=version.Version(2, 2),
                        data_license=License.from_identifier("CC0-1.0"))

    logging.debug(f"Creating SBOM Creation Info section")
    document.creation_info.set_created_now()
    org = creationinfo.Organization(ws_conn.get_name(),
                                    extra_conf.get('org_email'))
    tool = creationinfo.Tool("White Source SBOM Report Generator")
    person = creationinfo.Person(extra_conf.get('person'),
                                 extra_conf.get('person_email'))
    document.creation_info.add_creator(org)
    document.creation_info.add_creator(tool)
    document.creation_info.add_creator(person)
    logging.debug(f"Finished SBOM Document section")

    return document
コード例 #21
0
    def generate(cls, project: Project, path: PathLike) -> FileReportInfo:
        """Generate a FileReport from a path in a Project."""
        path = Path(path)
        if not path.is_file():
            raise OSError("{} is not a file".format(path))

        # pylint: disable=protected-access
        relative = project._relative_from_root(path)
        report = cls("./" + str(relative), path)

        bad_licenses = set()
        missing_licenses = set()

        # Checksum and ID
        report.spdxfile.chk_sum = _checksum(path)
        spdx_id = md5()
        spdx_id.update(str(relative).encode("utf-8"))
        spdx_id.update(report.spdxfile.chk_sum.value.encode("utf-8"))
        report.spdxfile.spdx_id = "SPDXRef-{}".format(spdx_id.hexdigest())

        spdx_info = project.spdx_info_of(path)
        for expression in spdx_info.spdx_expressions:
            for identifier in _LICENSING.license_keys(expression):
                # Bad license
                if identifier not in project.license_map:
                    bad_licenses.add(identifier)
                # Missing license
                elif identifier not in project.licenses:
                    missing_licenses.add(identifier)

                # Add license to report.
                report.spdxfile.add_lics(License.from_identifier(identifier))

        # Copyright text
        report.spdxfile.copyright = "\n".join(spdx_info.copyright_lines)

        return FileReportInfo(report, bad_licenses, missing_licenses)
コード例 #22
0
 def test_correct_data_lics(self):
     lics_str = 'CC0-1.0'
     self.builder.set_doc_data_lics(self.document, lics_str)
     assert self.document.data_license == License.from_identifier(lics_str)
コード例 #23
0
    import sys
    import codecs
    from spdx.writers.tagvalue import write_document, InvalidDocumentError
    from spdx.document import Document, License, LicenseConjuction, ExtractedLicense
    from spdx.version import Version
    from spdx.creationinfo import Person
    from spdx.review import Review
    from spdx.package import Package
    from spdx.file import File, FileType
    from spdx.checksum import Algorithm
    from spdx.utils import SPDXNone, NoAssert, UnKnown

    doc = Document()
    doc.version = Version(1, 2)
    doc.comment = 'Example Document'
    doc.data_license = License.from_identifier('CC0-1.0')
    doc.creation_info.add_creator(Person('Alice', '*****@*****.**'))
    doc.creation_info.set_created_now()
    review = Review(Person('Joe', None))
    review.set_review_date_now()
    review.comment = 'Joe reviewed this document'
    doc.add_review(review)
    # File
    testfile1 = File('TestFile1')
    testfile1.type = FileType.BINARY
    testfile1.comment = 'This is a test file.'
    testfile1.chk_sum = Algorithm('SHA1', 'c537c5d99eca5333f23491d47ededd083fefb7ad')
    testfile1.conc_lics = License.from_identifier('BSD-2-Clause')
    testfile1.add_lics(License.from_identifier('BSD-2-Clause'))
    testfile1.copyright = SPDXNone()
    testfile1.add_artifact('name', 'TagWriteTest')
コード例 #24
0
def write_spdx(version, notice, scanned_files, input, output_file, as_tagvalue=True):
    """
    Write scan output formatted as SPDX Tag/value or RDF.
    """
    absinput = abspath(input)

    if os.path.isdir(absinput):
        input_path = absinput
    else:
        input_path = os.path.dirname(absinput)

    doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
    doc.comment = notice

    doc.creation_info.add_creator(Tool('ScanCode ' + version))
    doc.creation_info.set_created_now()

    package = doc.package = Package(
        name=os.path.basename(input_path),
        download_location=NoAssert()
    )

    # Use a set of unique copyrights for the package.
    package.cr_text = set()

    all_files_have_no_license = True
    all_files_have_no_copyright = True

    for file_data in scanned_files:
        # Construct the absolute path in case we need to access the file
        # to calculate its SHA1.
        file_entry = File(os.path.join(input_path, file_data.get('path')))

        file_sha1 = file_data.get('sha1')
        if not file_sha1:
            if os.path.isfile(file_entry.name):
                # Calculate the SHA1 in case it is missing, e.g. for empty files.
                file_sha1 = file_entry.calc_chksum()
            else:
                # Skip directories.
                continue

        # Restore the relative file name as that is what we want in
        # SPDX output (with explicit leading './').
        file_entry.name = './' + file_data.get('path')
        file_entry.chk_sum = Algorithm('SHA1', file_sha1)

        file_licenses = file_data.get('licenses')
        if file_licenses:
            all_files_have_no_license = False
            for file_license in file_licenses:
                spdx_id = file_license.get('spdx_license_key')
                if spdx_id:
                    # spdx_id = spdx_id.rstrip('+')
                    spdx_license = License.from_identifier(spdx_id)
                else:
                    license_key = file_license.get('key')
                    # FIXME: we should prefix this with ScanCode-
                    licenseref_id = 'LicenseRef-' + license_key
                    spdx_license = ExtractedLicense(licenseref_id)
                    spdx_license.name = file_license.get('short_name')
                    comment = 'See details at https://github.com/nexB/scancode-toolkit/blob/develop/src/licensedcode/data/licenses/%s.yml\n' % license_key
                    spdx_license.comment = comment
                    text = file_license.get('matched_text')
                    # always set some text, even if we did not extract the matched text
                    if not text:
                        text = comment
                    spdx_license.text = text
                    doc.add_extr_lic(spdx_license)

                # Add licenses in the order they appear in the file. Maintaining the order
                # might be useful for provenance purposes.
                file_entry.add_lics(spdx_license)
                package.add_lics_from_file(spdx_license)

        elif file_licenses is None:
            all_files_have_no_license = False
            file_entry.add_lics(NoAssert())

        else:
            file_entry.add_lics(SPDXNone())

        file_entry.conc_lics = NoAssert()

        file_copyrights = file_data.get('copyrights')
        if file_copyrights:
            all_files_have_no_copyright = False
            file_entry.copyright = []
            for file_copyright in file_copyrights:
                file_entry.copyright.extend(file_copyright.get('statements'))

            package.cr_text.update(file_entry.copyright)

            # Create a text of copyright statements in the order they appear in the file.
            # Maintaining the order might be useful for provenance purposes.
            file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'

        elif file_copyrights is None:
            all_files_have_no_copyright = False
            file_entry.copyright = NoAssert()

        else:
            file_entry.copyright = SPDXNone()


        package.add_file(file_entry)

    if len(package.files) == 0:
        if as_tagvalue:
            output_file.write("# No results for package '{}'.\n".format(package.name))
        else:
            output_file.write("<!-- No results for package '{}'. -->\n".format(package.name))

    # Remove duplicate licenses from the list for the package.
    unique_licenses = set(package.licenses_from_files)
    if not len(package.licenses_from_files):
        if all_files_have_no_license:
            package.licenses_from_files = [SPDXNone()]
        else:
            package.licenses_from_files = [NoAssert()]
    else:
        # List license identifiers alphabetically for the package.
        package.licenses_from_files = sorted(unique_licenses, key=lambda x: x.identifier)

    if len(package.cr_text) == 0:
        if all_files_have_no_copyright:
            package.cr_text = SPDXNone()
        else:
            package.cr_text = NoAssert()
    else:
        # Create a text of alphabetically sorted copyright
        # statements for the package.
        package.cr_text = '\n'.join(sorted(package.cr_text)) + '\n'

    package.verif_code = doc.package.calc_verif_code()
    package.license_declared = NoAssert()
    package.conc_lics = NoAssert()

    if as_tagvalue:
        from spdx.writers.tagvalue import write_document
    else:
        from spdx.writers.rdf import write_document

    # The spdx-tools write_document returns either:
    # - unicode for tag values
    # - UTF8-encoded bytes for rdf because somehow the rd and xml
    #   libraries do the encoding
    # The file passed by ScanCode for output is alwasy opened in binary
    # mode and needs to receive UTF8-encoded bytes.
    # Therefore in one case we do nothing (rdf) and in the other case we
    # encode to UTF8 bytes.

    from StringIO import StringIO
    spdx_output = StringIO()
    write_document(doc, spdx_output, validate=True)
    result = spdx_output.getvalue()
    if as_tagvalue:
        result = result.encode('utf-8')
    output_file.write(result)
コード例 #25
0
 def test_from_identifier(self):
     mit = License.from_identifier('MIT')
     assert mit.full_name == 'MIT License'
     assert mit.url == 'http://spdx.org/licenses/MIT'
コード例 #26
0
 def test_url(self):
     lic = License(full_name='Apache License 1.0', identifier='Apache-1.0')
     assert lic.url == 'http://spdx.org/licenses/Apache-1.0'
コード例 #27
0
    import sys
    import codecs
    from spdx.writers.tagvalue import write_document, InvalidDocumentError
    from spdx.document import Document, License, LicenseConjuction, ExtractedLicense
    from spdx.version import Version
    from spdx.creationinfo import Person
    from spdx.review import Review
    from spdx.package import Package
    from spdx.file import File, FileType
    from spdx.checksum import Algorithm
    from spdx.utils import SPDXNone, NoAssert, UnKnown

    doc = Document()
    doc.version = Version(1, 2)
    doc.comment = 'Example Document'
    doc.data_license = License.from_identifier('CC0-1.0')
    doc.creation_info.add_creator(Person('Alice', '*****@*****.**'))
    doc.creation_info.set_created_now()
    review = Review(Person('Joe', None))
    review.set_review_date_now()
    review.comment = 'Joe reviewed this document'
    doc.add_review(review)
    # File
    testfile1 = File('TestFile1')
    testfile1.type = FileType.BINARY
    testfile1.comment = 'This is a test file.'
    testfile1.chk_sum = Algorithm('SHA1',
                                  'c537c5d99eca5333f23491d47ededd083fefb7ad')
    testfile1.conc_lics = License.from_identifier('BSD-2-Clause')
    testfile1.add_lics(License.from_identifier('BSD-2-Clause'))
    testfile1.copyright = SPDXNone()
コード例 #28
0
 def test_from_identifier(self):
     mit = License.from_identifier('MIT')
     assert mit.full_name == 'MIT License'
     assert mit.url == 'http://spdx.org/licenses/MIT'
コード例 #29
0
def write_formatted_output(scanners, files_count, version, notice,
                           scanned_files, format, options, input, output_file,
                           _echo, _save):
    """
    Save scan results to file or screen.
    """

    if format == 'html':
        for template_chunk in as_template(scanned_files, files_count,
                                          output_file):
            try:
                output_file.write(template_chunk)
            except Exception as e:
                extra_context = 'ERROR: Failed to write output to HTML for: ' + repr(
                    template_chunk)
                _echo(extra_context, fg='red')
                _save(extra_context + '\n', output_file)
                e.args += (extra_context, )
                raise e

    elif format == 'html-app':
        output_file.write(as_html_app(input, output_file))
        try:
            create_html_app_assets(scanned_files, output_file)
        except HtmlAppAssetCopyWarning:
            _echo('\nHTML app creation skipped when printing to stdout.',
                  fg='yellow')
            _save('\nHTML app creation skipped when printing to stdout.',
                  output_file)
        except HtmlAppAssetCopyError:
            _echo('\nFailed to create HTML app.', fg='red')
            _save('\nFailed to create HTML app.', output_file)
    elif format == 'json' or format == 'json-pp':
        import simplejson as json

        meta = OrderedDict()
        meta['scancode_notice'] = notice
        meta['scancode_version'] = version
        meta['scancode_options'] = options
        meta['files_count'] = files_count
        meta['files'] = scanned_files
        if format == 'json-pp':
            output_file.write(
                unicode(
                    json.dumps(meta,
                               indent=2 * ' ',
                               iterable_as_array=True,
                               encoding='utf-8')))
        else:
            output_file.write(
                unicode(
                    json.dumps(meta,
                               separators=(',', ':'),
                               iterable_as_array=True,
                               encoding='utf-8')))
        output_file.write('\n')

    elif format in ('spdx-tv', 'spdx-rdf'):
        from spdx.checksum import Algorithm
        from spdx.creationinfo import Tool
        from spdx.document import Document, License
        from spdx.file import File
        from spdx.package import Package
        from spdx.utils import NoAssert
        from spdx.utils import SPDXNone
        from spdx.version import Version

        input = abspath(input)

        if os.path.isdir(input):
            input_path = input
        else:
            input_path = os.path.dirname(input)

        doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))

        doc.creation_info.add_creator(Tool('ScanCode ' + version))
        doc.creation_info.set_created_now()

        doc.package = Package(os.path.basename(input_path), NoAssert())

        # Use a set of unique copyrights for the package.
        doc.package.cr_text = set()

        all_files_have_no_license = True
        all_files_have_no_copyright = True

        for file_data in scanned_files:
            # Construct the absolute path in case we need to access the file
            # to calculate its SHA1.
            file_entry = File(os.path.join(input_path, file_data.get('path')))

            file_sha1 = file_data.get('sha1')
            if not file_sha1:
                if os.path.isfile(file_entry.name):
                    # Calculate the SHA1 in case it is missing, e.g. for empty files.
                    file_sha1 = file_entry.calc_chksum()
                else:
                    # Skip directories.
                    continue

            # Restore the relative file name as that is what we want in
            # SPDX output (with explicit leading './').
            file_entry.name = './' + file_data.get('path')
            file_entry.chk_sum = Algorithm('SHA1', file_sha1)

            file_licenses = file_data.get('licenses')
            if file_licenses:
                all_files_have_no_license = False
                for file_license in file_licenses:
                    spdx_id = file_license.get('spdx_license_key')
                    if spdx_id:
                        spdx_license = License.from_identifier(spdx_id)
                    else:
                        license_key = 'LicenseRef-' + file_license.get('key')
                        spdx_license = License(file_license.get('short_name'),
                                               license_key)

                    # Add licenses in the order they appear in the file. Maintaining the order
                    # might be useful for provenance purposes.
                    file_entry.add_lics(spdx_license)
                    doc.package.add_lics_from_file(spdx_license)
            else:
                if file_licenses == None:
                    all_files_have_no_license = False
                    spdx_license = NoAssert()
                else:
                    spdx_license = SPDXNone()

                file_entry.add_lics(spdx_license)

            file_entry.conc_lics = NoAssert()

            file_copyrights = file_data.get('copyrights')
            if file_copyrights:
                all_files_have_no_copyright = False
                file_entry.copyright = []
                for file_copyright in file_copyrights:
                    file_entry.copyright.extend(
                        file_copyright.get('statements'))

                doc.package.cr_text.update(file_entry.copyright)

                # Create a text of copyright statements in the order they appear in the file.
                # Maintaining the order might be useful for provenance purposes.
                file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'
            else:
                if file_copyrights == None:
                    all_files_have_no_copyright = False
                    spdx_copyright = NoAssert()
                else:
                    spdx_copyright = SPDXNone()

                file_entry.copyright = spdx_copyright

            doc.package.add_file(file_entry)

        if len(doc.package.files) == 0:
            if format == 'spdx-tv':
                output_file.write("# No results for package '{}'.\n".format(
                    doc.package.name))
            else:
                output_file.write(
                    "<!-- No results for package '{}'. -->\n".format(
                        doc.package.name))
            return

        # Remove duplicate licenses from the list for the package.
        unique_licenses = set(doc.package.licenses_from_files)
        if len(doc.package.licenses_from_files) == 0:
            if all_files_have_no_license:
                doc.package.licenses_from_files = [SPDXNone()]
            else:
                doc.package.licenses_from_files = [NoAssert()]
        else:
            # List license identifiers alphabetically for the package.
            doc.package.licenses_from_files = sorted(
                unique_licenses, key=lambda x: x.identifier)

        if len(doc.package.cr_text) == 0:
            if all_files_have_no_copyright:
                doc.package.cr_text = SPDXNone()
            else:
                doc.package.cr_text = NoAssert()
        else:
            # Create a text of alphabetically sorted copyright statements for the package.
            doc.package.cr_text = '\n'.join(sorted(doc.package.cr_text)) + '\n'

        doc.package.verif_code = doc.package.calc_verif_code()
        doc.package.license_declared = NoAssert()
        doc.package.conc_lics = NoAssert()

        # As the spdx-tools package can only write the document to a "str" file but ScanCode provides a "unicode" file,
        # write to a "str" buffer first and then manually write the value to a "unicode" file.
        from StringIO import StringIO

        str_buffer = StringIO()

        if format == 'spdx-tv':
            from spdx.writers.tagvalue import write_document
            write_document(doc, str_buffer)
        else:
            from spdx.writers.rdf import write_document
            write_document(doc, str_buffer)

        output_file.write(str_buffer.getvalue())

    else:
        raise Exception('Unknown format')
コード例 #30
0
ファイル: core.py プロジェクト: spdx/spdx-py-build-tool
    def create_spdx_document(self):
        """
        Write identifier scan results as SPDX Tag/value or RDF.
        """
        logging.basicConfig(level=logging.INFO)
        logging.info("Creating spdx document")
        self.get_output_file()
        self.spdx_document = Document(
            version=Version(2, 1),
            data_license=License.from_identifier(
                self.code_extra_params["lic_identifier"]),
        )
        self.set_creation_info()
        if isdir(self.path_or_file):
            input_path = self.path_or_file
        else:
            input_path = dirname(self.path_or_file)

        package = self.spdx_document.package = Package(
            download_location=NoAssert(), version=self.get_package_version())
        self.set_package_info(package)
        all_files_have_no_license = True
        all_files_have_no_copyright = True
        file_license_list = []
        file_license_ids = []
        if is_dir(self.path_or_file):
            for idx, file_data in enumerate(self.id_scan_results):
                file_data_instance = open(file_data["FileName"], "r")
                if not should_skip_file(file_data["FileName"],
                                        self.output_file_name):
                    name = file_data["FileName"].replace(
                        self.path_or_file, ".")
                    file_entry = File(
                        name=name,
                        chk_sum=Algorithm(
                            "SHA1",
                            get_file_hash(file_data["FileName"]) or ""),
                    )
                    spdx_license = None
                    if self.doc_type == TAG_VALUE:
                        spdx_license = License.from_identifier(
                            file_data["SPDXID"])
                    else:
                        licenseref_id = "SPDXID-Doc-Generator-" + file_data[
                            "SPDXID"]
                        file_license_ids.append(licenseref_id)
                        if licenseref_id in file_license_ids:
                            spdx_license = ExtractedLicense(licenseref_id)
                        spdx_license.name = NoAssert()
                        comment = "N/A"
                        spdx_license.comment = comment
                        text = NoAssert()
                        if not text:
                            text = comment
                        spdx_license.text = text
                        self.spdx_document.add_extr_lic(spdx_license)
                        package.add_lics_from_file(spdx_license)
                    file_entry.add_lics(spdx_license)
                    file_license_list.append(spdx_license)
                    file_entry.conc_lics = NoAssert()
                    file_entry.copyright = SPDXNone()
                    file_entry.spdx_id = self.code_extra_params[
                        "file_ref"].format(idx + 1)
                    package.add_file(file_entry)
            if self.doc_type == TAG_VALUE:
                for spdx_license in list(set(file_license_list)):
                    package.add_lics_from_file(spdx_license)

        if len(package.files) == 0:
            if self.doc_type == TAG_VALUE:
                self.output_file.write(
                    "# No results for package '{}'.\n".format(package.name))
            else:
                self.output_file.write(
                    "<!-- No results for package '{}'. -->\n".format(
                        package.name))

        if self.doc_type == TAG_VALUE:
            from spdx.writers.tagvalue import write_document  # NOQA
        else:
            from spdx.writers.rdf import write_document  # NOQA

        if package.files:
            spdx_output = io.StringIO()
            if self.doc_type == TAG_VALUE:
                write_document(self.spdx_document, spdx_output, validate=False)
                logging.info("SPDX Tag-Value Document created successfully.")
            else:
                # spdx_output = io.BytesIO()
                write_document(self.spdx_document, spdx_output, validate=False)
                logging.info("SPDX RDF Document created successfully.")
            result = spdx_output.getvalue()
            if self.doc_type == TAG_VALUE:
                result = result.encode("utf-8")
            self.output_file.write(result)
コード例 #31
0
def write_spdx(
        output_file,
        files,
        tool_name,
        tool_version,
        notice,
        package_name='',
        download_location=NoAssert(),
        as_tagvalue=True,
        spdx_version=(2, 2),
        with_notice_text=False,
):
    """
    Write scan output as SPDX Tag/value to ``output_file`` file-like
    object using the ``files`` list of scanned file data.
    Write as RDF XML if ``as_tagvalue`` is False.

    Use the ``notice`` string as a notice included in a document comment.
    Include the ``tool_name`` and ``tool_version`` to indicate which tool is
    producing this SPDX document.
    Use ``package_name`` as a Package name and as a namespace prefix base.
    """
    as_rdf = not as_tagvalue
    _patch_license_list()

    ns_prefix = '_'.join(package_name.lower().split())
    comment = notice + f'\nSPDX License List: {scancode_config.spdx_license_list_version}'

    doc = Document(
        version=Version(*spdx_version),
        data_license=License.from_identifier('CC0-1.0'),
        comment=notice,
        namespace=f'http://spdx.org/spdxdocs/{ns_prefix}-{uuid.uuid4()}',
        license_list_version=scancode_config.spdx_license_list_version,
        name='SPDX Document created by ScanCode Toolkit')

    tool_name = tool_name or 'ScanCode'
    doc.creation_info.add_creator(Tool(f'{tool_name} {tool_version}'))
    doc.creation_info.set_created_now()

    package_id = '001'
    package = doc.package = Package(
        name=package_name,
        download_location=download_location,
        spdx_id=f'SPDXRef-{package_id}',
    )

    # Use a set of unique copyrights for the package.
    package.cr_text = set()

    all_files_have_no_license = True
    all_files_have_no_copyright = True

    # FIXME: this should walk the codebase instead!!!
    for sid, file_data in enumerate(files, 1):

        # Skip directories.
        if file_data.get('type') != 'file':
            continue

        # Set a relative file name as that is what we want in
        # SPDX output (with explicit leading './').
        name = './' + file_data.get('path')
        file_entry = File(spdx_id=f'SPDXRef-{sid}',
                          name=name,
                          chk_sum=Algorithm('SHA1',
                                            file_data.get('sha1') or ''))

        file_licenses = file_data.get('licenses')
        if file_licenses:
            all_files_have_no_license = False
            for file_license in file_licenses:
                license_key = file_license.get('key')

                spdx_id = file_license.get('spdx_license_key')
                if not spdx_id:
                    spdx_id = f'LicenseRef-scancode-{license_key}'
                is_license_ref = spdx_id.lower().startswith('licenseref-')

                if not is_license_ref:
                    spdx_license = License.from_identifier(spdx_id)
                else:
                    spdx_license = ExtractedLicense(spdx_id)
                    spdx_license.name = file_license.get('short_name')
                    # FIXME: replace this with the licensedb URL
                    comment = (
                        f'See details at https://github.com/nexB/scancode-toolkit'
                        f'/blob/develop/src/licensedcode/data/licenses/{license_key}.yml\n'
                    )
                    spdx_license.comment = comment
                    text = file_license.get('matched_text')
                    # always set some text, even if we did not extract the
                    # matched text
                    if not text:
                        text = comment
                    spdx_license.text = text
                    doc.add_extr_lic(spdx_license)

                # Add licenses in the order they appear in the file. Maintaining
                # the order might be useful for provenance purposes.
                file_entry.add_lics(spdx_license)
                package.add_lics_from_file(spdx_license)

        elif file_licenses is None:
            all_files_have_no_license = False
            file_entry.add_lics(NoAssert())

        else:
            file_entry.add_lics(SPDXNone())

        file_entry.conc_lics = NoAssert()

        file_copyrights = file_data.get('copyrights')
        if file_copyrights:
            all_files_have_no_copyright = False
            file_entry.copyright = []
            for file_copyright in file_copyrights:
                file_entry.copyright.append(file_copyright.get('copyright'))

            package.cr_text.update(file_entry.copyright)

            # Create a text of copyright statements in the order they appear in
            # the file. Maintaining the order might be useful for provenance
            # purposes.
            file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'

        elif file_copyrights is None:
            all_files_have_no_copyright = False
            file_entry.copyright = NoAssert()

        else:
            file_entry.copyright = SPDXNone()

        package.add_file(file_entry)

    if len(package.files) == 0:
        if as_tagvalue:
            msg = "# No results for package '{}'.\n".format(package.name)
        else:
            # rdf
            msg = "<!-- No results for package '{}'. -->\n".format(
                package.name)
        output_file.write(msg)

    # Remove duplicate licenses from the list for the package.
    unique_licenses = {l.identifier: l for l in package.licenses_from_files}
    unique_licenses = list(unique_licenses.values())
    if not len(package.licenses_from_files):
        if all_files_have_no_license:
            package.licenses_from_files = [SPDXNone()]
        else:
            package.licenses_from_files = [NoAssert()]
    else:
        # List license identifiers alphabetically for the package.
        package.licenses_from_files = sorted(
            unique_licenses,
            key=lambda x: x.identifier,
        )

    if len(package.cr_text) == 0:
        if all_files_have_no_copyright:
            package.cr_text = SPDXNone()
        else:
            package.cr_text = NoAssert()
    else:
        # Create a text of alphabetically sorted copyright
        # statements for the package.
        package.cr_text = '\n'.join(sorted(package.cr_text)) + '\n'

    package.verif_code = doc.package.calc_verif_code()
    package.license_declared = NoAssert()
    package.conc_lics = NoAssert()

    # The spdx-tools write_document returns either:
    # - unicode for tag values
    # - UTF8-encoded bytes for rdf because somehow the rdf and xml
    #   libraries do the encoding and do not return text but bytes
    # The file passed by ScanCode for output is opened in text mode Therefore in
    # one case we do need to deal with bytes and decode before writing (rdf) and
    # in the other case we deal with text all the way.

    if package.files:

        if as_tagvalue:
            from spdx.writers.tagvalue import write_document  # NOQA
        elif as_rdf:
            from spdx.writers.rdf import write_document  # NOQA

        if as_tagvalue:
            spdx_output = StringIO()
        elif as_rdf:
            # rdf is utf-encoded bytes
            spdx_output = BytesIO()

        write_document(doc, spdx_output, validate=False)
        result = spdx_output.getvalue()

        if as_rdf:
            # rdf is utf-encoded bytes
            result = result.decode('utf-8')

        output_file.write(result)
コード例 #32
0
def write_spdx(version,
               notice,
               scanned_files,
               input,
               output_file,
               as_tagvalue=True):
    """
    Write scan output formatted as SPDX Tag/value or RDF.
    """
    absinput = abspath(input)

    if os.path.isdir(absinput):
        input_path = absinput
    else:
        input_path = os.path.dirname(absinput)

    doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
    doc.comment = notice

    doc.creation_info.add_creator(Tool('ScanCode ' + version))
    doc.creation_info.set_created_now()

    package = doc.package = Package(name=os.path.basename(input_path),
                                    download_location=NoAssert())

    # Use a set of unique copyrights for the package.
    package.cr_text = set()

    all_files_have_no_license = True
    all_files_have_no_copyright = True

    for file_data in scanned_files:
        # Construct the absolute path in case we need to access the file
        # to calculate its SHA1.
        file_entry = File(os.path.join(input_path, file_data.get('path')))

        file_sha1 = file_data.get('sha1')
        if not file_sha1:
            if os.path.isfile(file_entry.name):
                # Calculate the SHA1 in case it is missing, e.g. for empty files.
                file_sha1 = file_entry.calc_chksum()
            else:
                # Skip directories.
                continue

        # Restore the relative file name as that is what we want in
        # SPDX output (with explicit leading './').
        file_entry.name = './' + file_data.get('path')
        file_entry.chk_sum = Algorithm('SHA1', file_sha1)

        file_licenses = file_data.get('licenses')
        if file_licenses:
            all_files_have_no_license = False
            for file_license in file_licenses:
                spdx_id = file_license.get('spdx_license_key')
                if spdx_id:
                    # spdx_id = spdx_id.rstrip('+')
                    spdx_license = License.from_identifier(spdx_id)
                else:
                    license_key = file_license.get('key')
                    # FIXME: we should prefix this with ScanCode-
                    licenseref_id = 'LicenseRef-' + license_key
                    spdx_license = ExtractedLicense(licenseref_id)
                    spdx_license.name = file_license.get('short_name')
                    comment = 'See details at https://github.com/nexB/scancode-toolkit/blob/develop/src/licensedcode/data/licenses/%s.yml\n' % license_key
                    spdx_license.comment = comment
                    text = file_license.get('matched_text')
                    # always set some text, even if we did not extract the matched text
                    if not text:
                        text = comment
                    spdx_license.text = text
                    doc.add_extr_lic(spdx_license)

                # Add licenses in the order they appear in the file. Maintaining the order
                # might be useful for provenance purposes.
                file_entry.add_lics(spdx_license)
                package.add_lics_from_file(spdx_license)

        elif file_licenses is None:
            all_files_have_no_license = False
            file_entry.add_lics(NoAssert())

        else:
            file_entry.add_lics(SPDXNone())

        file_entry.conc_lics = NoAssert()

        file_copyrights = file_data.get('copyrights')
        if file_copyrights:
            all_files_have_no_copyright = False
            file_entry.copyright = []
            for file_copyright in file_copyrights:
                file_entry.copyright.extend(file_copyright.get('statements'))

            package.cr_text.update(file_entry.copyright)

            # Create a text of copyright statements in the order they appear in the file.
            # Maintaining the order might be useful for provenance purposes.
            file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'

        elif file_copyrights is None:
            all_files_have_no_copyright = False
            file_entry.copyright = NoAssert()

        else:
            file_entry.copyright = SPDXNone()

        package.add_file(file_entry)

    if len(package.files) == 0:
        if as_tagvalue:
            output_file.write("# No results for package '{}'.\n".format(
                package.name))
        else:
            output_file.write("<!-- No results for package '{}'. -->\n".format(
                package.name))

    # Remove duplicate licenses from the list for the package.
    unique_licenses = set(package.licenses_from_files)
    if not len(package.licenses_from_files):
        if all_files_have_no_license:
            package.licenses_from_files = [SPDXNone()]
        else:
            package.licenses_from_files = [NoAssert()]
    else:
        # List license identifiers alphabetically for the package.
        package.licenses_from_files = sorted(unique_licenses,
                                             key=lambda x: x.identifier)

    if len(package.cr_text) == 0:
        if all_files_have_no_copyright:
            package.cr_text = SPDXNone()
        else:
            package.cr_text = NoAssert()
    else:
        # Create a text of alphabetically sorted copyright
        # statements for the package.
        package.cr_text = '\n'.join(sorted(package.cr_text)) + '\n'

    package.verif_code = doc.package.calc_verif_code()
    package.license_declared = NoAssert()
    package.conc_lics = NoAssert()

    if as_tagvalue:
        from spdx.writers.tagvalue import write_document
    else:
        from spdx.writers.rdf import write_document

    # The spdx-tools write_document returns either:
    # - unicode for tag values
    # - UTF8-encoded bytes for rdf because somehow the rd and xml
    #   libraries do the encoding
    # The file passed by ScanCode for output is alwasy opened in binary
    # mode and needs to receive UTF8-encoded bytes.
    # Therefore in one case we do nothing (rdf) and in the other case we
    # encode to UTF8 bytes.

    from StringIO import StringIO
    spdx_output = StringIO()
    write_document(doc, spdx_output, validate=True)
    result = spdx_output.getvalue()
    if as_tagvalue:
        result = result.encode('utf-8')
    output_file.write(result)
コード例 #33
0
    def generate_spdx_package(self) -> Package:
        """Generates the SPDX package.

        Example of a SPDX package:
        PackageName: eduVPN
        DataFormat: SPDXRef-1
        PackageSupplier: Organization: The Commons Conservancy eduVPN Programme
        PackageHomePage: https://eduvpn.org
        PackageLicenseDeclared: GPL-3.0+
        PackageCopyrightText: 2017, The Commons Conservancy eduVPN Programme
        PackageSummary: <text>EduVPN is designed to allow users to connect
        securely and encrypted to the Internet from any standard device.
                        </text>
        PackageComment: <text>The package includes the following libraries; see
        Relationship information.
                        </text>
        Created: 2017-06-06T09:00:00Z
        PackageDownloadLocation: git://github.com/eduVPN/reponame
        PackageDownloadLocation: git+https://github.com/eduVPN/reponame.git
        PackageDownloadLocation: git+ssh://github.com/eduVPN/reponame.git
        Creator: Person: Jane Doe

        Returns:
            the corresponding package
        """
        package = Package(
            name=determine_spdx_value(self.name),
            spdx_id=f"SPDXRef-{self.id}",
            download_location=determine_spdx_value(None),
            version=determine_spdx_value(self.version),
            file_name=determine_spdx_value(self.name),
            supplier=None,
            originator=Person(determine_spdx_value(self.author),
                              determine_spdx_value(self.author_email)),
        )
        package.check_sum = Algorithm("SHA1", str(NoAssert()))
        package.cr_text = NoAssert()
        package.homepage = determine_spdx_value(self.url)
        package.license_declared = License.from_identifier(
            str(determine_spdx_value(self.main_licence)))
        package.conc_lics = License.from_identifier(
            str(determine_spdx_value(self.licence)))
        package.summary = determine_spdx_value(self.description)
        package.description = NoAssert()
        files = self.get_spdx_files()
        if files:
            package.files_analyzed = True
            for file in files:
                package.add_file(file.generate_spdx_file())
                package.add_lics_from_file(
                    License.from_identifier(
                        str(determine_spdx_value(file.licence))))
                _set_package_copyright(file, package)
            package.verif_code = determine_spdx_value(
                package.calc_verif_code())
        else:
            # Has to generate a dummy file because of the following rule in SDK:
            # - Package must have at least one file
            dummy_file = SpdxFile(Path(UNKNOWN), self._package_info.root_dir,
                                  self.main_licence)
            package.verif_code = NoAssert()
            package.add_file(dummy_file.generate_spdx_file())
            package.add_lics_from_file(
                License.from_identifier(
                    str(determine_spdx_value(dummy_file.licence))))
        return package
コード例 #34
0
def write_spdx(output_file, files, tool_name, tool_version, notice, input_file, as_tagvalue=True):
    """
    Write scan output as SPDX Tag/value or RDF.
    """
    _patch_license_list()

    absinput = abspath(input_file)

    if isdir(absinput):
        input_path = absinput
    else:
        input_path = dirname(absinput)

    doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
    doc.comment = notice
    tool_name = tool_name or 'ScanCode'
    doc.creation_info.add_creator(Tool(tool_name + ' ' + tool_version))
    doc.creation_info.set_created_now()

    package = doc.package = Package(
        name=basename(input_path),
        download_location=NoAssert()
    )

    # Use a set of unique copyrights for the package.
    package.cr_text = set()

    all_files_have_no_license = True
    all_files_have_no_copyright = True

    # FIXME: this should walk the codebase instead!!!
    for file_data in files:

        # Skip directories.
        if file_data.get('type') != 'file':
            continue

        # Set a relative file name as that is what we want in
        # SPDX output (with explicit leading './').
        name = './' + file_data.get('path')
        file_entry = File(
            name=name,
            chk_sum=Algorithm('SHA1', file_data.get('sha1') or '')
        )

        file_licenses = file_data.get('licenses')
        if file_licenses:
            all_files_have_no_license = False
            for file_license in file_licenses:
                spdx_id = file_license.get('spdx_license_key')
                if spdx_id:
                    spdx_license = License.from_identifier(spdx_id)
                else:
                    license_key = file_license.get('key')
                    licenseref_id = 'LicenseRef-scancode-' + license_key
                    spdx_license = ExtractedLicense(licenseref_id)
                    spdx_license.name = file_license.get('short_name')
                    comment = ('See details at https://github.com/nexB/scancode-toolkit'
                               '/blob/develop/src/licensedcode/data/licenses/%s.yml\n' % license_key)
                    spdx_license.comment = comment
                    text = file_license.get('matched_text')
                    # always set some text, even if we did not extract the matched text
                    if not text:
                        text = comment
                    spdx_license.text = text
                    doc.add_extr_lic(spdx_license)

                # Add licenses in the order they appear in the file. Maintaining the order
                # might be useful for provenance purposes.
                file_entry.add_lics(spdx_license)
                package.add_lics_from_file(spdx_license)

        elif file_licenses is None:
            all_files_have_no_license = False
            file_entry.add_lics(NoAssert())

        else:
            file_entry.add_lics(SPDXNone())

        file_entry.conc_lics = NoAssert()

        file_copyrights = file_data.get('copyrights')
        if file_copyrights:
            all_files_have_no_copyright = False
            file_entry.copyright = []
            for file_copyright in file_copyrights:
                file_entry.copyright.append(file_copyright.get('value'))

            package.cr_text.update(file_entry.copyright)

            # Create a text of copyright statements in the order they appear in the file.
            # Maintaining the order might be useful for provenance purposes.
            file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'

        elif file_copyrights is None:
            all_files_have_no_copyright = False
            file_entry.copyright = NoAssert()

        else:
            file_entry.copyright = SPDXNone()

        package.add_file(file_entry)

    if len(package.files) == 0:
        if as_tagvalue:
            msg = "# No results for package '{}'.\n".format(package.name)
        else:
            # rdf
            msg = "<!-- No results for package '{}'. -->\n".format(package.name)
        output_file.write(msg)

    # Remove duplicate licenses from the list for the package.
    unique_licenses = {(l.identifier, l.full_name): l for l in package.licenses_from_files}
    unique_licenses = list(unique_licenses.values())
    if not len(package.licenses_from_files):
        if all_files_have_no_license:
            package.licenses_from_files = [SPDXNone()]
        else:
            package.licenses_from_files = [NoAssert()]
    else:
        # List license identifiers alphabetically for the package.
        package.licenses_from_files = sorted(unique_licenses, key=lambda x: x.identifier)

    if len(package.cr_text) == 0:
        if all_files_have_no_copyright:
            package.cr_text = SPDXNone()
        else:
            package.cr_text = NoAssert()
    else:
        # Create a text of alphabetically sorted copyright
        # statements for the package.
        package.cr_text = '\n'.join(sorted(package.cr_text)) + '\n'

    package.verif_code = doc.package.calc_verif_code()
    package.license_declared = NoAssert()
    package.conc_lics = NoAssert()

    # The spdx-tools write_document returns either:
    # - unicode for tag values
    # - UTF8-encoded bytes for rdf because somehow the rdf and xml
    #   libraries do the encoding and do not return text but bytes
    # The file passed by ScanCode for output is opened in text mode Therefore in
    # one case we do need to deal with bytes and decode before writing (rdf) and
    # in the other case we deal with text all the way.

    if package.files:

        if as_tagvalue:
            from spdx.writers.tagvalue import write_document  # NOQA
        else:
            from spdx.writers.rdf import write_document  # NOQA

        if as_tagvalue:
            # unicode text everywhere
            spdx_output = StringIO()
        else:
            # rdf as utf-encoded bytes on Py2
            spdx_output = BytesIO()

        write_document(doc, spdx_output, validate=False)
        result = spdx_output.getvalue()

        if as_tagvalue:
            # unicode text everywhere
            pass
        else:
            # rdf as utf-encoded bytes on Py2
            result = result.decode('utf-8')

        output_file.write(result)