Exemple #1
0
def check_json_scan(
    expected_file,
    result_file,
    regen=False,
    remove_file_date=False,
    check_headers=False,
):
    """
    Check the scan `result_file` JSON results against the `expected_file`
    expected JSON results.

    If `regen` is True the expected_file WILL BE overwritten with the new scan
    results from `results_file`. This is convenient for updating tests
    expectations. But use with caution.

    If `remove_file_date` is True, the file.date attribute is removed.
    If `check_headers` is True, the scan headers attribute is not removed.
    """
    results = load_json_result(result_file, remove_file_date)
    if regen:
        with open(expected_file, 'w') as reg:
            json.dump(results, reg, indent=2, separators=(',', ': '))

    expected = load_json_result(expected_file, remove_file_date)

    if not check_headers:
        results.pop('headers', None)
        expected.pop('headers', None)

    # NOTE we redump the JSON as a YAML string for easier display of
    # the failures comparison/diff
    if results != expected:
        expected = saneyaml.dump(expected)
        results = saneyaml.dump(results)
        assert results == expected
Exemple #2
0
def check_ignorable_clues(rule):
    """
    Validate that all ignorable clues defined in a `rule` Rule object are
    properly detected in that rule text file.
    """
    from itertools import chain
    from scancode import api

    text_file = rule.text_file

    # scan clues
    scan_data = {}
    scan_data.update(api.get_copyrights(text_file))
    scan_data.update(api.get_urls(text_file, threshold=0))
    scan_data.update(api.get_emails(text_file, threshold=0))

    results = OrderedDict()
    for what, detections in scan_data.items():
        # remove lines
        for detected in detections:
            detected.pop('start_line', None)
            detected.pop('end_line', None)

        # remove keys and keep only values e.g. a list of detected copyrights,
        # emails, etc
        detections = sorted(
            set(chain(*(detected.values() for detected in detections))))
        results['ignorable_' + what] = detections

    # collect ignorables
    expected = OrderedDict([
        ('ignorable_copyrights', rule.ignorable_copyrights or []),
        ('ignorable_holders', rule.ignorable_holders or []),
        ('ignorable_authors', rule.ignorable_authors or []),
        ('ignorable_urls', rule.ignorable_urls or []),
        ('ignorable_emails', rule.ignorable_emails or []),
    ])

    results = OrderedDict([(k, v) for k, v in sorted(results.items()) if v])
    expected = OrderedDict([(k, v) for k, v in sorted(expected.items()) if v])

    try:
        assert expected == results
    except:
        # On failure, we compare againto get additional failure details such as
        # a clickable text_file path

        data_file = rule.data_file
        if not data_file:
            data_file = text_file.replace('.LICENSE', '.yml')
        results['files'] = [
            'file://{data_file}'.format(**locals()),
            'file://{text_file}'.format(**locals()),
        ]
        # this assert will always fail and provide a more detailed failure trace
        assert saneyaml.dump(expected) == saneyaml.dump(results)
def check_ignorable_clues(licensish, regen=REGEN_TEST_FIXTURES, verbose=False):
    """
    Validate that all expected ignorable clues declared in a `licensish` License
    or Rule object are properly detected in that rule text file. Optionally
    ``regen`` the ignorables to update the License or Rule .yml data file.
    """
    result = models.get_ignorables(text_file=licensish.text_file)

    if verbose:
        print()
        print('result')
        pprint(result)

    if regen:
        is_from_license = licensish.is_from_license
        if is_from_license:
            db = cache.get_licenses_db()
            licish = db[licensish.license_expression]
        else:
            licish = licensish
        models.set_ignorables(licish, result, verbose=verbose)
        licish.dump()
        if is_from_license:
            licensish = models.build_rule_from_license(licish)

    expected = models.get_normalized_ignorables(licensish)

    if verbose:
        print('expected')
        pprint(expected)

    try:
        assert result == expected
    except:
        # On failure, we compare again to get additional failure details such as
        # a clickable text_file path.

        data_file = licensish.data_file
        if not data_file:
            data_file = licensish.text_file.replace('.LICENSE', '.yml')

        result['files'] = [
            f'file://{data_file}',
            f'file://{licensish.text_file}',
        ]

        # This assert will always fail and provide a more detailed failure trace
        assert saneyaml.dump(result) == saneyaml.dump(expected)
Exemple #4
0
def dependency_mapper(dependencies, scope='dependencies'):
    """
    Yield DependentPackage collected from a list of cargo dependencies
    """
    is_runtime = not scope.endswith(('dev-dependencies', 'build-dependencies'))
    for name, requirement in dependencies.items():
        if isinstance(requirement, str):
            # plain version requirement
            is_optional = False
        elif isinstance(requirement, dict):
            # complex requirement, with more than version are harder to handle
            # so we just dump
            is_optional = requirement.pop('optional', False)
            requirement = saneyaml.dump(requirement)

        yield models.DependentPackage(
            purl=PackageURL(
                type='cargo',
                name=name,
            ).to_string(),
            extracted_requirement=requirement,
            scope=scope,
            is_runtime=is_runtime,
            is_optional=is_optional,
            is_resolved=False,
        )
Exemple #5
0
def generate_license_tests(location):

    # map their keys to ours
    license_mapping = {spdx: l.key for spdx, l in get_spdx_symbols().items()}
    license_mapping.update(extra_license_keys)

    for test in list(collect_tests(location)) + list(
            collect_url_tests(location)):
        loc = test.location

        print(f'Processing: {loc}')

        with open(loc, 'w') as txt:
            txt.write(test.text)

        lickey = test.license_key
        lickey = lickey and lickey.lower() or None
        lickey = license_mapping.get(lickey)
        lickey = lickey or 'unknown'

        url = f'https://raw.githubusercontent.com/google/licensecheck/v0.3.1/testdata/{test.filename}'
        with open(loc + '.yml', 'w') as td:
            data = dict(
                license_expressions=[lickey],
                notes=
                (f'License test derived from a file of the BSD-licensed repository at:\n'
                 + f'{url}\n' +
                 f'originally expected to be detected as {test.license_key}\n'
                 + f'with coverage of {test.coverage}\n' + (test.notes or '')))
            td.write(saneyaml.dump(data))
Exemple #6
0
 def dump(self):
     """
     Dump a representation of self to its YAML data file
     """
     as_yaml = saneyaml.dump(self.to_dict())
     with io.open(self.data_file, 'w', encoding='utf-8') as df:
         df.write(as_yaml)
def check_cyclone_output(expected_file, result_file, regen=False):
    """
    Check that expected and result_file are equal. Ignore headers.
    If `regen` is True the expected_file is overwritten with `results_file`.
    """
    result = load_and_clean_json(result_file)

    if regen:
        with open(expected_file, 'w') as reg:
            json.dump(result, reg, indent=2, separators=(',', ': '))

    expected = load_and_clean_json(expected_file)

    # NOTE we redump the JSON as a YAML string for easier display of
    # the failures comparison/diff
    if result != expected:
        expected = saneyaml.dump(expected)
        result = saneyaml.dump(result)
        assert result == expected
def check_result_equals_expected_json(result, expected_loc, regen=False):
    """
    Check equality between a result collection and the data in an expected_loc
    JSON file. Regen the expected file if regen is True.
    """
    if regen:
        expected = result

        expected_dir = os.path.dirname(expected_loc)
        if not os.path.exists(expected_dir):
            os.makedirs(expected_dir)

        with open(expected_loc, 'w') as ex:
            json.dump(expected, ex, indent=2, separators=(',', ': '))
    else:
        with open(expected_loc) as ex:
            expected = json.load(ex)

    if result != expected:
        assert saneyaml.dump(result) == saneyaml.dump(expected)
Exemple #9
0
def license_details_view(request, key):
    """
    Display all the information available about the provided license `key`
    followed by the full license text.
    """
    licenses = get_licenses()
    try:
        data = saneyaml.dump(licenses[key].to_dict())
        text = licenses[key].text
    except KeyError:
        return HttpResponse(f"License {key} not found.")
    return HttpResponse(f"<pre>{data}</pre><hr><pre>{text}</pre>")
Exemple #10
0
def check_ignorable_clues(licensish, regen=False, verbose=False):
    """
    Validate that all expected ignorable clues declared in a `licensish` License
    or Rule object are properly detected in that rule text file. Optionally
    regen the ignorables and updates the License or Rule .yml data file.
    """
    result = models.get_ignorables(text_file=licensish.text_file)

    if verbose:
        print()
        print('result')
        pprint(result)

    if regen:
        models.set_ignorables(licensish, result , verbose=verbose)
        licensish.dump()

    expected = models.get_normalized_ignorables(licensish)

    if verbose:
        print('expected')
        pprint(expected)

    try:
        assert result == expected
    except:
        # On failure, we compare again to get additional failure details such as
        # a clickable text_file path.

        data_file = licensish.data_file
        if not data_file:
            data_file = licensish.text_file.replace('.LICENSE', '.yml')

        result['files'] = [
            f'file://{data_file}',
            f'file://{licensish.text_file}',
        ]

        # This assert will always fail and provide a more detailed failure trace
        assert saneyaml.dump(result) == saneyaml.dump(expected)
Exemple #11
0
def generate_details(output_path):
    license_details_template = env.get_template("license_details.html")
    for license in licenses.values():
        license_data = license.to_dict()
        html = license_details_template.render(
            **base_context,
            license=license,
            license_data=license_data,
        )
        write_file(output_path, f"{license.key}.html", html)
        write_file(output_path, f"{license.key}.yml", saneyaml.dump(license_data))
        write_file(output_path, f"{license.key}.json", json.dumps(license_data))
        write_file(output_path, f"{license.key}.LICENSE", license.text)
Exemple #12
0
def check_yaml_scan(expected_file, result_file, regen=False):
    """
    Check the scan `result_file` YAML results against the `expected_file`
    expected YAML results.

    If `regen` is True the expected_file WILL BE overwritten with the new scan
    results from `results_file`. This is convenient for updating tests
    expectations. But use with caution.
    """
    results = load_yaml_results(result_file) or {}
    if regen:
        with open(expected_file, 'w') as reg:
            reg.write(saneyaml.dump(results))

    expected = load_yaml_results(expected_file)

    results.pop('headers', None)
    expected.pop('headers', None)

    # NOTE we redump the YAML as a string for a more efficient display of the
    # failures comparison/diff
    expected = saneyaml.dump(expected)
    results = saneyaml.dump(results)
    assert expected == results
Exemple #13
0
def pretty(data):
    """
    Return a unicode text pretty representation of data (as YAML or else) if
    data is a sequence or mapping or the data as-is otherwise
    """
    if not data:
        return None
    seqtypes = list, tuple
    maptypes = OrderedDict, dict
    coltypes = seqtypes + maptypes
    if isinstance(data, seqtypes):
        if len(data) == 1 and isinstance(data[0], string_types):
            return data[0].strip()
    if isinstance(data, coltypes):
        return saneyaml.dump(
            data, indent=2, encoding='utf-8').decode('utf-8').strip()
    return data
Exemple #14
0
def check_expected_parse_copyright_file(
    test_loc,
    expected_loc,
    regen=False,
    with_details=False,
):
    """
    Check copyright parsing of `test_loc` location against an expected JSON file
    at `expected_loc` location. Regen the expected file if `regen` is True.
    """
    if with_details:
        skip_debian_packaging = True
        simplify_licenses = True
        unique = True
    else:
        skip_debian_packaging = False
        simplify_licenses = False
        unique = False

    parsed = debian_copyright.parse_copyright_file(
        copyright_file=test_loc,
        skip_debian_packaging=skip_debian_packaging,
        simplify_licenses=simplify_licenses,
        unique=unique,
    )
    result = saneyaml.dump(list(parsed))
    if regen:
        with io.open(expected_loc, 'w', encoding='utf-8') as reg:
            reg.write(result)

    with io.open(expected_loc, encoding='utf-8') as ex:
        expected = ex.read()

    if result != expected:

        expected = '\n'.join([
            'file://' + test_loc,
            'file://' + expected_loc,
            expected
        ])

        assert result == expected
Exemple #15
0
    def closure_test_function(self):
        with io.open(test_file, encoding='utf-8') as inp:
            test_load = saneyaml.load(inp.read())
            test_dump = saneyaml.dump(test_load)

        if regen:
            with io.open(expected_load_file, 'w', encoding='utf-8') as out:
                json.dump(test_load, out, indent=2)

            with io.open(expected_dump_file, 'w', encoding='utf-8') as out:
                out.write(test_dump)

        with io.open(expected_load_file, encoding='utf-8') as inp:
            expected_load = json.load(inp)

        with io.open(expected_dump_file, encoding='utf-8') as inp:
            expected_dump = inp.read()

        assert expected_load == test_load
        assert expected_dump == test_dump
Exemple #16
0
def check_expected(test_loc, expected_loc, regen=False):
    """
    Check copyright parsing of `test_loc` location against an expected JSON file
    at `expected_loc` location. Regen the expected file if `regen` is True.
    """
    result = saneyaml.dump(
        list(debian_copyright.parse_copyright_file(test_loc)))
    if regen:
        with io.open(expected_loc, 'w', encoding='utf-8') as reg:
            reg.write(result)

    with io.open(expected_loc, encoding='utf-8') as ex:
        expected = ex.read()

    if expected != result:

        expected = '\n'.join(
            ['file://' + test_loc, 'file://' + expected_loc, expected])

        assert expected == result
Exemple #17
0
def generate_indexes(output_path):
    license_list_template = env.get_template("license_list.html")
    index_html = license_list_template.render(
        **base_context,
        licenses=licenses,
    )
    write_file(output_path, "index.html", index_html)

    index = [
        {
            "license_key": key,
            "spdx_license_key": license.spdx_license_key,
            "other_spdx_license_keys": license.other_spdx_license_keys,
            "is_exception": license.is_exception,
            "is_deprecated": license.is_deprecated,
            "json": f"{key}.json",
            "yml": f"{key}.yml",
            "html": f"{key}.html",
            "text": f"{key}.LICENSE",
        }
        for key, license in licenses.items()
    ]
    write_file(output_path, "index.json", json.dumps(index))
    write_file(output_path, "index.yml", saneyaml.dump(index))
Exemple #18
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        project = self.object

        files_qs = project.codebaseresources.files()

        file_filter = self.request.GET.get("file-filter", "all")
        if file_filter == "in-a-package":
            files_qs = files_qs.in_package()
        elif file_filter == "not-in-a-package":
            files_qs = files_qs.not_in_package()

        files = files_qs.only(
            "programming_language",
            "mime_type",
            "holders",
            "copyrights",
            "license_expressions",
        )
        packages = project.discoveredpackages.all().only(
            "type",
            "license_expression",
        )

        file_languages = files.values_list("programming_language", flat=True)
        file_mime_types = files.values_list("mime_type", flat=True)
        file_holders = self.data_from_model_field(files, "holders", "value")
        file_copyrights = self.data_from_model_field(files, "copyrights", "value")
        file_license_keys = self.data_from_model_field(files, "licenses", "key")
        file_license_categories = self.data_from_model_field(
            files, "licenses", "category"
        )

        file_compliance_alert = []
        if scanpipe_app.policies_enabled:
            file_compliance_alert = files.values_list("compliance_alert", flat=True)

        package_licenses = packages.values_list("license_expression", flat=True)
        package_types = packages.values_list("type", flat=True)

        inputs, missing_inputs = project.inputs_with_source
        if missing_inputs:
            message = (
                "The following input files are not available on disk anymore:\n- "
                + "\n- ".join(missing_inputs.keys())
            )
            messages.error(self.request, message)

        context.update(
            {
                "inputs_with_source": inputs,
                "programming_languages": self.get_summary(file_languages),
                "mime_types": self.get_summary(file_mime_types),
                "holders": self.get_summary(file_holders),
                "copyrights": self.get_summary(file_copyrights),
                "file_license_keys": self.get_summary(file_license_keys),
                "file_license_categories": self.get_summary(file_license_categories),
                "file_compliance_alert": self.get_summary(file_compliance_alert),
                "package_licenses": self.get_summary(package_licenses),
                "package_types": self.get_summary(package_types),
                "file_filter": file_filter,
                "add_pipeline_form": AddPipelineForm(),
                "add_inputs_form": AddInputsForm(),
            }
        )

        if project.extra_data:
            context["extra_data_yaml"] = saneyaml.dump(project.extra_data, indent=2)

        return context
 def dump(self):
     parent = fileutils.parent_directory(self.data_file)
     if not exists(parent):
         fileutils.create_dir(parent)
     with open(self.data_file, 'w') as df:
         df.write(saneyaml.dump(self.to_dict()))
Exemple #20
0
 def dumps(self):
     """
     Return a string representation of self in YAML block format.
     """
     return saneyaml.dump(self.to_dict())
Exemple #21
0
def write_yaml(results, output_file, **kwargs):
    """
    Write `results` to the `output_file` opened file-like object.
    """
    output_file.write(saneyaml.dump(results, indent=4))
    output_file.write('\n')
def check_expected_parse_copyright_file(
    test_loc,
    expected_loc,
    regen=False,
    simplified=False,
    _licensing=Licensing(),
):
    '''
    Check copyright parsing of `test_loc` location against an expected JSON file
    at `expected_loc` location. Regen the expected file if `regen` is True.
    '''
    if simplified:
        filter_duplicates = True
        skip_debian_packaging = True
        simplify_licenses = True
        unique_copyrights = True
    else:

        filter_duplicates = False
        skip_debian_packaging = False
        simplify_licenses = False
        unique_copyrights = False
    try:
        dc = debian_copyright.parse_copyright_file(
            location=test_loc,
            check_consistency=False,
        )

        declared_license = dc.get_declared_license(
            filter_duplicates=filter_duplicates,
            skip_debian_packaging=skip_debian_packaging,
        )

        license_expression = dc.get_license_expression(
            skip_debian_packaging=skip_debian_packaging,
            simplify_licenses=simplify_licenses,
        )

        license_expression_keys = set(_licensing.license_keys(license_expression))

        copyrght = dc.get_copyright(
            skip_debian_packaging=skip_debian_packaging,
            unique_copyrights=unique_copyrights,
        ).strip()

        primary_license = dc.primary_license

        match_details = list(map(get_match_details, dc.license_matches))

        results = {
            'primary_license': primary_license,
            'declared_license': declared_license,
            'license_expression': license_expression,
            'copyright': copyrght,
            'matches': match_details,
        }

        if regen:
            expected = results
            with open(expected_loc, 'w') as res:
                res.write(saneyaml.dump(results))
        else:
            with open(expected_loc) as ex:
                expected = saneyaml.load(ex.read())
    except Exception as e:
        import traceback
        files = [
            'file://' + test_loc,
            'file://' + expected_loc,
        ]
        raise Exception(repr(e), traceback.format_exc(), files) from e

    if (
        not regen
        and (saneyaml.dump(results) != saneyaml.dump(expected)
        or 'unknown-license-reference' in license_expression_keys)
    ) :
        res = {
            'test_loc': f'file://{test_loc}',
            'expected_loc': f'file://{expected_loc}',
        }
        res.update(results)
        results = saneyaml.dump(res)
        results = results.replace(
            'unknown-license-reference',
            'unknown-license-reference should not be detected',
        )
        assert results == saneyaml.dump(expected)
Exemple #23
0
 def test_dump_does_handles_numbers_and_booleans_correctly(self):
     test = [None, dict([(1, None), (123.34, 'tha')])]
     expected = ("-\n" "- 1:\n" "  '123.34': tha\n")
     assert expected == saneyaml.dump(test)
Exemple #24
0
 def test_dump_increases_indents_correctly(self):
     test = {'a': [1, [2, 3, [4, 5]]]}
     expected = 'a:\n  - 1\n  - - 2\n    - 3\n    - - 4\n      - 5\n'
     assert expected == saneyaml.dump(test)
Exemple #25
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        project = self.object

        input_path = project.input_path
        context["inputs"] = [
            (path.relative_to(input_path), path.is_file())
            for path in input_path.glob("*")
        ]

        files_qs = project.codebaseresources.files()

        file_filter = self.request.GET.get("file-filter", "all")
        if file_filter == "in-a-package":
            files_qs = files_qs.in_package()
        elif file_filter == "not-in-a-package":
            files_qs = files_qs.not_in_package()

        files = files_qs.only(
            "programming_language",
            "mime_type",
            "holders",
            "copyrights",
            "license_expressions",
        )
        packages = project.discoveredpackages.all().only(
            "type",
            "license_expression",
        )

        file_languages = files.values_list("programming_language", flat=True)
        file_mime_types = files.values_list("mime_type", flat=True)
        file_holders = self.data_from_model_field(files, "holders", "value")
        file_copyrights = self.data_from_model_field(files, "copyrights", "value")
        file_license_keys = self.data_from_model_field(files, "licenses", "key")
        file_license_categories = self.data_from_model_field(
            files, "licenses", "category"
        )

        package_licenses = packages.values_list("license_expression", flat=True)
        package_types = packages.values_list("type", flat=True)

        context.update(
            {
                "programming_languages": self.get_summary(file_languages),
                "mime_types": self.get_summary(file_mime_types),
                "holders": self.get_summary(file_holders),
                "copyrights": self.get_summary(file_copyrights),
                "file_license_keys": self.get_summary(file_license_keys),
                "file_license_categories": self.get_summary(file_license_categories),
                "package_licenses": self.get_summary(package_licenses),
                "package_types": self.get_summary(package_types),
                "file_filter": file_filter,
                "add_pipeline_form": AddPipelineForm(),
            }
        )

        if project.extra_data:
            context["extra_data_yaml"] = saneyaml.dump(project.extra_data, indent=2)

        return context
Exemple #26
0
 def test_dump_converts_bytes_to_unicode_correctly(self):
     test = {b'a': b'foo'}
     expected = 'a: foo\n'
     assert expected == saneyaml.dump(test)