def generate_spdx_document(self) -> Document:
        """Generates the SPDX document.

        Example of SPDX document section.
        SPDXVersion: SPDX-2.1
        DataLicense: CC0-1.0
        SPDXID: SPDXRef-DOCUMENT
        DocumentName: mbed-targets
        DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-3c4714e6-a7b1-4574-abb8-861149cbc590
        Creator: Person: Anonymous ()
        Creator: Organization: Anonymous ()
        Creator: Tool: reuse-0.8.0
        Created: 2020-01-20T17:53:41Z
        CreatorComment: <text>
        This document was created automatically using available reuse information consistent with REUSE.
        </text>

        Returns:
            the corresponding document
        """
        doc = Document()
        doc.version = Version(1, 2)
        doc.name = determine_spdx_value(self.document_name)
        doc.namespace = determine_spdx_value(self.document_namespace)
        doc.spdx_id = "SPDXRef-DOCUMENT"
        doc.comment = determine_spdx_value(
            "This document was created automatically using available information from python packages."
        )
        doc.data_license = License.from_identifier("CC0-1.0")
        doc.creation_info.add_creator(Person(self.author, self.author_email))
        if not self._is_dependency:
            doc.creation_info.add_creator(
                Organization(self.organisation, self.organisation_email))
        doc.creation_info.add_creator(Tool(self.tool_name))
        doc.creation_info.set_created_now()
        if not self._is_dependency:
            review = Review(
                Person(
                    determine_spdx_value(self.reviewer),
                    determine_spdx_value(self.reviewer_email),
                ))
            review.set_review_date_now()
            doc.add_review(review)

        # FIXME with current tooling and specification, only one package can
        #  be described in a file and hence, all dependencies are described
        #  in separate files. Find out what to do with dependencies when new
        #  tools are released as it is not entirely clear in the specification
        doc.package = self.generate_spdx_package().generate_spdx_package()

        for external_reference in self.external_refs:
            doc.add_ext_document_reference(
                external_reference.generate_external_reference())
        return doc
Example #2
0
    package.download_location = 'http://www.tagwritetest.test/download'
    package.homepage = SPDXNone()
    package.verif_code = '4e3211c67a2d28fced849ee1bb76e7391b93feba'
    license_set = LicenseConjuction(License.from_identifier('Apache-2.0'),
                                    License.from_identifier('BSD-2-Clause'))
    package.conc_lics = license_set
    package.license_declared = license_set
    package.add_lics_from_file(License.from_identifier('Apache-2.0'))
    package.add_lics_from_file(License.from_identifier('BSD-2-Clause'))
    package.cr_text = NoAssert()
    package.summary = 'Simple package.'
    package.description = 'Really simple package.'
    package.add_file(testfile1)
    package.add_file(testfile2)

    doc.package = package

    # An extracted license

    lic = ExtractedLicense('LicenseRef-1')
    lic.text = 'Some non legal legal text..'
    doc.add_extr_lic(lic)

    file = sys.argv[1]
    with codecs.open(file, mode='w', encoding='utf-8') as out:
        try:
            write_document(doc, out)
        except InvalidDocumentError:
            print 'Document is Invalid'
            messages = []
            doc.validate(messages)
Example #3
0
def write_formatted_output(scanners, files_count, version, notice,
                           scanned_files, format, options, input, output_file,
                           _echo, _save):
    """
    Save scan results to file or screen.
    """

    if format == 'html':
        for template_chunk in as_template(scanned_files, files_count,
                                          output_file):
            try:
                output_file.write(template_chunk)
            except Exception as e:
                extra_context = 'ERROR: Failed to write output to HTML for: ' + repr(
                    template_chunk)
                _echo(extra_context, fg='red')
                _save(extra_context + '\n', output_file)
                e.args += (extra_context, )
                raise e

    elif format == 'html-app':
        output_file.write(as_html_app(input, output_file))
        try:
            create_html_app_assets(scanned_files, output_file)
        except HtmlAppAssetCopyWarning:
            _echo('\nHTML app creation skipped when printing to stdout.',
                  fg='yellow')
            _save('\nHTML app creation skipped when printing to stdout.',
                  output_file)
        except HtmlAppAssetCopyError:
            _echo('\nFailed to create HTML app.', fg='red')
            _save('\nFailed to create HTML app.', output_file)
    elif format == 'json' or format == 'json-pp':
        import simplejson as json

        meta = OrderedDict()
        meta['scancode_notice'] = notice
        meta['scancode_version'] = version
        meta['scancode_options'] = options
        meta['files_count'] = files_count
        meta['files'] = scanned_files
        if format == 'json-pp':
            output_file.write(
                unicode(
                    json.dumps(meta,
                               indent=2 * ' ',
                               iterable_as_array=True,
                               encoding='utf-8')))
        else:
            output_file.write(
                unicode(
                    json.dumps(meta,
                               separators=(',', ':'),
                               iterable_as_array=True,
                               encoding='utf-8')))
        output_file.write('\n')

    elif format in ('spdx-tv', 'spdx-rdf'):
        from spdx.checksum import Algorithm
        from spdx.creationinfo import Tool
        from spdx.document import Document, License
        from spdx.file import File
        from spdx.package import Package
        from spdx.utils import NoAssert
        from spdx.utils import SPDXNone
        from spdx.version import Version

        input = abspath(input)

        if os.path.isdir(input):
            input_path = input
        else:
            input_path = os.path.dirname(input)

        doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))

        doc.creation_info.add_creator(Tool('ScanCode ' + version))
        doc.creation_info.set_created_now()

        doc.package = Package(os.path.basename(input_path), NoAssert())

        # Use a set of unique copyrights for the package.
        doc.package.cr_text = set()

        all_files_have_no_license = True
        all_files_have_no_copyright = True

        for file_data in scanned_files:
            # Construct the absolute path in case we need to access the file
            # to calculate its SHA1.
            file_entry = File(os.path.join(input_path, file_data.get('path')))

            file_sha1 = file_data.get('sha1')
            if not file_sha1:
                if os.path.isfile(file_entry.name):
                    # Calculate the SHA1 in case it is missing, e.g. for empty files.
                    file_sha1 = file_entry.calc_chksum()
                else:
                    # Skip directories.
                    continue

            # Restore the relative file name as that is what we want in
            # SPDX output (with explicit leading './').
            file_entry.name = './' + file_data.get('path')
            file_entry.chk_sum = Algorithm('SHA1', file_sha1)

            file_licenses = file_data.get('licenses')
            if file_licenses:
                all_files_have_no_license = False
                for file_license in file_licenses:
                    spdx_id = file_license.get('spdx_license_key')
                    if spdx_id:
                        spdx_license = License.from_identifier(spdx_id)
                    else:
                        license_key = 'LicenseRef-' + file_license.get('key')
                        spdx_license = License(file_license.get('short_name'),
                                               license_key)

                    # Add licenses in the order they appear in the file. Maintaining the order
                    # might be useful for provenance purposes.
                    file_entry.add_lics(spdx_license)
                    doc.package.add_lics_from_file(spdx_license)
            else:
                if file_licenses == None:
                    all_files_have_no_license = False
                    spdx_license = NoAssert()
                else:
                    spdx_license = SPDXNone()

                file_entry.add_lics(spdx_license)

            file_entry.conc_lics = NoAssert()

            file_copyrights = file_data.get('copyrights')
            if file_copyrights:
                all_files_have_no_copyright = False
                file_entry.copyright = []
                for file_copyright in file_copyrights:
                    file_entry.copyright.extend(
                        file_copyright.get('statements'))

                doc.package.cr_text.update(file_entry.copyright)

                # Create a text of copyright statements in the order they appear in the file.
                # Maintaining the order might be useful for provenance purposes.
                file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'
            else:
                if file_copyrights == None:
                    all_files_have_no_copyright = False
                    spdx_copyright = NoAssert()
                else:
                    spdx_copyright = SPDXNone()

                file_entry.copyright = spdx_copyright

            doc.package.add_file(file_entry)

        if len(doc.package.files) == 0:
            if format == 'spdx-tv':
                output_file.write("# No results for package '{}'.\n".format(
                    doc.package.name))
            else:
                output_file.write(
                    "<!-- No results for package '{}'. -->\n".format(
                        doc.package.name))
            return

        # Remove duplicate licenses from the list for the package.
        unique_licenses = set(doc.package.licenses_from_files)
        if len(doc.package.licenses_from_files) == 0:
            if all_files_have_no_license:
                doc.package.licenses_from_files = [SPDXNone()]
            else:
                doc.package.licenses_from_files = [NoAssert()]
        else:
            # List license identifiers alphabetically for the package.
            doc.package.licenses_from_files = sorted(
                unique_licenses, key=lambda x: x.identifier)

        if len(doc.package.cr_text) == 0:
            if all_files_have_no_copyright:
                doc.package.cr_text = SPDXNone()
            else:
                doc.package.cr_text = NoAssert()
        else:
            # Create a text of alphabetically sorted copyright statements for the package.
            doc.package.cr_text = '\n'.join(sorted(doc.package.cr_text)) + '\n'

        doc.package.verif_code = doc.package.calc_verif_code()
        doc.package.license_declared = NoAssert()
        doc.package.conc_lics = NoAssert()

        # As the spdx-tools package can only write the document to a "str" file but ScanCode provides a "unicode" file,
        # write to a "str" buffer first and then manually write the value to a "unicode" file.
        from StringIO import StringIO

        str_buffer = StringIO()

        if format == 'spdx-tv':
            from spdx.writers.tagvalue import write_document
            write_document(doc, str_buffer)
        else:
            from spdx.writers.rdf import write_document
            write_document(doc, str_buffer)

        output_file.write(str_buffer.getvalue())

    else:
        raise Exception('Unknown format')
Example #4
0
    package.download_location = 'http://www.tagwritetest.test/download'
    package.homepage = SPDXNone()
    package.verif_code = '4e3211c67a2d28fced849ee1bb76e7391b93feba'
    license_set = LicenseConjuction(License.from_identifier('Apache-2.0'),
        License.from_identifier('BSD-2-Clause'))
    package.conc_lics = license_set
    package.license_declared = license_set
    package.add_lics_from_file(License.from_identifier('Apache-2.0'))
    package.add_lics_from_file(License.from_identifier('BSD-2-Clause'))
    package.cr_text = NoAssert()
    package.summary = 'Simple package.'
    package.description = 'Really simple package.'
    package.add_file(testfile1)
    package.add_file(testfile2)

    doc.package = package

    # An extracted license

    lic = ExtractedLicense('LicenseRef-1')
    lic.text = 'Some non legal legal text..'
    doc.add_extr_lic(lic)

    file = sys.argv[1]
    with codecs.open(file, mode='w', encoding='utf-8') as out:
        try:
            write_document(doc, out)
        except InvalidDocumentError:
            print 'Document is Invalid'
            messages = []
            doc.validate(messages)
Example #5
0
def write_spdx(version, notice, scanned_files, format, input, output_file):

    from spdx.checksum import Algorithm
    from spdx.creationinfo import Tool
    from spdx.document import Document
    from spdx.document import License
    from spdx.document import ExtractedLicense
    from spdx.file import File
    from spdx.package import Package
    from spdx.utils import NoAssert
    from spdx.utils import SPDXNone
    from spdx.version import Version

    absinput = abspath(input)

    if os.path.isdir(absinput):
        input_path = absinput
    else:
        input_path = os.path.dirname(absinput)

    doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
    doc.comment = notice

    doc.creation_info.add_creator(Tool('ScanCode ' + version))
    doc.creation_info.set_created_now()

    doc.package = Package(os.path.basename(input_path), NoAssert())

    # Use a set of unique copyrights for the package.
    doc.package.cr_text = set()

    all_files_have_no_license = True
    all_files_have_no_copyright = True

    for file_data in scanned_files:
        # Construct the absolute path in case we need to access the file
        # to calculate its SHA1.
        file_entry = File(os.path.join(input_path, file_data.get('path')))

        file_sha1 = file_data.get('sha1')
        if not file_sha1:
            if os.path.isfile(file_entry.name):
                # Calculate the SHA1 in case it is missing, e.g. for empty files.
                file_sha1 = file_entry.calc_chksum()
            else:
                # Skip directories.
                continue

        # Restore the relative file name as that is what we want in
        # SPDX output (with explicit leading './').
        file_entry.name = './' + file_data.get('path')
        file_entry.chk_sum = Algorithm('SHA1', file_sha1)

        file_licenses = file_data.get('licenses')
        if file_licenses:
            all_files_have_no_license = False
            for file_license in file_licenses:
                spdx_id = file_license.get('spdx_license_key')
                if spdx_id:
                    spdx_license = License.from_identifier(spdx_id)
                else:
                    license_key = file_license.get('key')
                    # FIXME: we should prefix this with ScanCode-
                    licenseref_id = 'LicenseRef-' + license_key
                    spdx_license = ExtractedLicense(licenseref_id)
                    spdx_license.name = file_license.get('short_name')
                    comment = 'See details at https://github.com/nexB/scancode-toolkit/blob/develop/src/licensedcode/data/licenses/%s.yml\n' % license_key
                    spdx_license.comment = comment
                    text = file_license.get('matched_text')
                    # always set some text, even if we did not extract the matched text
                    if not text:
                        text = comment
                    spdx_license.text = text
                    doc.add_extr_lic(spdx_license)

                # Add licenses in the order they appear in the file. Maintaining the order
                # might be useful for provenance purposes.
                file_entry.add_lics(spdx_license)
                doc.package.add_lics_from_file(spdx_license)

        elif file_licenses is None:
            all_files_have_no_license = False
            file_entry.add_lics(NoAssert())

        else:
            file_entry.add_lics(SPDXNone())

        file_entry.conc_lics = NoAssert()

        file_copyrights = file_data.get('copyrights')
        if file_copyrights:
            all_files_have_no_copyright = False
            file_entry.copyright = []
            for file_copyright in file_copyrights:
                file_entry.copyright.extend(file_copyright.get('statements'))

            doc.package.cr_text.update(file_entry.copyright)

            # Create a text of copyright statements in the order they appear in the file.
            # Maintaining the order might be useful for provenance purposes.
            file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'

        elif file_copyrights is None:
            all_files_have_no_copyright = False
            file_entry.copyright = NoAssert()

        else:
            file_entry.copyright = SPDXNone()

        doc.package.add_file(file_entry)

    if len(doc.package.files) == 0:
        if format == 'spdx-tv':
            output_file.write("# No results for package '{}'.\n".format(
                doc.package.name))
        elif format == 'spdx-rdf':
            output_file.write("<!-- No results for package '{}'. -->\n".format(
                doc.package.name))

    # Remove duplicate licenses from the list for the package.
    unique_licenses = set(doc.package.licenses_from_files)
    if not len(doc.package.licenses_from_files):
        if all_files_have_no_license:
            doc.package.licenses_from_files = [SPDXNone()]
        else:
            doc.package.licenses_from_files = [NoAssert()]
    else:
        # List license identifiers alphabetically for the package.
        doc.package.licenses_from_files = sorted(unique_licenses,
                                                 key=lambda x: x.identifier)

    if len(doc.package.cr_text) == 0:
        if all_files_have_no_copyright:
            doc.package.cr_text = SPDXNone()
        else:
            doc.package.cr_text = NoAssert()
    else:
        # Create a text of alphabetically sorted copyright
        # statements for the package.
        doc.package.cr_text = '\n'.join(sorted(doc.package.cr_text)) + '\n'

    doc.package.verif_code = doc.package.calc_verif_code()
    doc.package.license_declared = NoAssert()
    doc.package.conc_lics = NoAssert()

    if format == 'spdx-tv':
        from spdx.writers.tagvalue import write_document
    elif format == 'spdx-rdf':
        from spdx.writers.rdf import write_document

    # As the spdx-tools package can only write the document to a
    # "str" file but ScanCode provides a "unicode" file, write to a
    # "str" buffer first and then manually write the value to a
    # "unicode" file.
    from StringIO import StringIO
    str_buffer = StringIO()
    write_document(doc, str_buffer, validate=True)
    as_unicode = str_buffer.getvalue().decode('utf-8')
    output_file.write(as_unicode)