def check_json_scan( expected_file, result_file, regen=False, remove_file_date=False, check_headers=False, ): """ Check the scan `result_file` JSON results against the `expected_file` expected JSON results. If `regen` is True the expected_file WILL BE overwritten with the new scan results from `results_file`. This is convenient for updating tests expectations. But use with caution. If `remove_file_date` is True, the file.date attribute is removed. If `check_headers` is True, the scan headers attribute is not removed. """ results = load_json_result(result_file, remove_file_date) if regen: with open(expected_file, 'w') as reg: json.dump(results, reg, indent=2, separators=(',', ': ')) expected = load_json_result(expected_file, remove_file_date) if not check_headers: results.pop('headers', None) expected.pop('headers', None) # NOTE we redump the JSON as a YAML string for easier display of # the failures comparison/diff if results != expected: expected = saneyaml.dump(expected) results = saneyaml.dump(results) assert results == expected
def check_ignorable_clues(rule): """ Validate that all ignorable clues defined in a `rule` Rule object are properly detected in that rule text file. """ from itertools import chain from scancode import api text_file = rule.text_file # scan clues scan_data = {} scan_data.update(api.get_copyrights(text_file)) scan_data.update(api.get_urls(text_file, threshold=0)) scan_data.update(api.get_emails(text_file, threshold=0)) results = OrderedDict() for what, detections in scan_data.items(): # remove lines for detected in detections: detected.pop('start_line', None) detected.pop('end_line', None) # remove keys and keep only values e.g. a list of detected copyrights, # emails, etc detections = sorted( set(chain(*(detected.values() for detected in detections)))) results['ignorable_' + what] = detections # collect ignorables expected = OrderedDict([ ('ignorable_copyrights', rule.ignorable_copyrights or []), ('ignorable_holders', rule.ignorable_holders or []), ('ignorable_authors', rule.ignorable_authors or []), ('ignorable_urls', rule.ignorable_urls or []), ('ignorable_emails', rule.ignorable_emails or []), ]) results = OrderedDict([(k, v) for k, v in sorted(results.items()) if v]) expected = OrderedDict([(k, v) for k, v in sorted(expected.items()) if v]) try: assert expected == results except: # On failure, we compare againto get additional failure details such as # a clickable text_file path data_file = rule.data_file if not data_file: data_file = text_file.replace('.LICENSE', '.yml') results['files'] = [ 'file://{data_file}'.format(**locals()), 'file://{text_file}'.format(**locals()), ] # this assert will always fail and provide a more detailed failure trace assert saneyaml.dump(expected) == saneyaml.dump(results)
def check_ignorable_clues(licensish, regen=REGEN_TEST_FIXTURES, verbose=False): """ Validate that all expected ignorable clues declared in a `licensish` License or Rule object are properly detected in that rule text file. Optionally ``regen`` the ignorables to update the License or Rule .yml data file. """ result = models.get_ignorables(text_file=licensish.text_file) if verbose: print() print('result') pprint(result) if regen: is_from_license = licensish.is_from_license if is_from_license: db = cache.get_licenses_db() licish = db[licensish.license_expression] else: licish = licensish models.set_ignorables(licish, result, verbose=verbose) licish.dump() if is_from_license: licensish = models.build_rule_from_license(licish) expected = models.get_normalized_ignorables(licensish) if verbose: print('expected') pprint(expected) try: assert result == expected except: # On failure, we compare again to get additional failure details such as # a clickable text_file path. data_file = licensish.data_file if not data_file: data_file = licensish.text_file.replace('.LICENSE', '.yml') result['files'] = [ f'file://{data_file}', f'file://{licensish.text_file}', ] # This assert will always fail and provide a more detailed failure trace assert saneyaml.dump(result) == saneyaml.dump(expected)
def dependency_mapper(dependencies, scope='dependencies'): """ Yield DependentPackage collected from a list of cargo dependencies """ is_runtime = not scope.endswith(('dev-dependencies', 'build-dependencies')) for name, requirement in dependencies.items(): if isinstance(requirement, str): # plain version requirement is_optional = False elif isinstance(requirement, dict): # complex requirement, with more than version are harder to handle # so we just dump is_optional = requirement.pop('optional', False) requirement = saneyaml.dump(requirement) yield models.DependentPackage( purl=PackageURL( type='cargo', name=name, ).to_string(), extracted_requirement=requirement, scope=scope, is_runtime=is_runtime, is_optional=is_optional, is_resolved=False, )
def generate_license_tests(location): # map their keys to ours license_mapping = {spdx: l.key for spdx, l in get_spdx_symbols().items()} license_mapping.update(extra_license_keys) for test in list(collect_tests(location)) + list( collect_url_tests(location)): loc = test.location print(f'Processing: {loc}') with open(loc, 'w') as txt: txt.write(test.text) lickey = test.license_key lickey = lickey and lickey.lower() or None lickey = license_mapping.get(lickey) lickey = lickey or 'unknown' url = f'https://raw.githubusercontent.com/google/licensecheck/v0.3.1/testdata/{test.filename}' with open(loc + '.yml', 'w') as td: data = dict( license_expressions=[lickey], notes= (f'License test derived from a file of the BSD-licensed repository at:\n' + f'{url}\n' + f'originally expected to be detected as {test.license_key}\n' + f'with coverage of {test.coverage}\n' + (test.notes or ''))) td.write(saneyaml.dump(data))
def dump(self): """ Dump a representation of self to its YAML data file """ as_yaml = saneyaml.dump(self.to_dict()) with io.open(self.data_file, 'w', encoding='utf-8') as df: df.write(as_yaml)
def check_cyclone_output(expected_file, result_file, regen=False): """ Check that expected and result_file are equal. Ignore headers. If `regen` is True the expected_file is overwritten with `results_file`. """ result = load_and_clean_json(result_file) if regen: with open(expected_file, 'w') as reg: json.dump(result, reg, indent=2, separators=(',', ': ')) expected = load_and_clean_json(expected_file) # NOTE we redump the JSON as a YAML string for easier display of # the failures comparison/diff if result != expected: expected = saneyaml.dump(expected) result = saneyaml.dump(result) assert result == expected
def check_result_equals_expected_json(result, expected_loc, regen=False): """ Check equality between a result collection and the data in an expected_loc JSON file. Regen the expected file if regen is True. """ if regen: expected = result expected_dir = os.path.dirname(expected_loc) if not os.path.exists(expected_dir): os.makedirs(expected_dir) with open(expected_loc, 'w') as ex: json.dump(expected, ex, indent=2, separators=(',', ': ')) else: with open(expected_loc) as ex: expected = json.load(ex) if result != expected: assert saneyaml.dump(result) == saneyaml.dump(expected)
def license_details_view(request, key): """ Display all the information available about the provided license `key` followed by the full license text. """ licenses = get_licenses() try: data = saneyaml.dump(licenses[key].to_dict()) text = licenses[key].text except KeyError: return HttpResponse(f"License {key} not found.") return HttpResponse(f"<pre>{data}</pre><hr><pre>{text}</pre>")
def check_ignorable_clues(licensish, regen=False, verbose=False): """ Validate that all expected ignorable clues declared in a `licensish` License or Rule object are properly detected in that rule text file. Optionally regen the ignorables and updates the License or Rule .yml data file. """ result = models.get_ignorables(text_file=licensish.text_file) if verbose: print() print('result') pprint(result) if regen: models.set_ignorables(licensish, result , verbose=verbose) licensish.dump() expected = models.get_normalized_ignorables(licensish) if verbose: print('expected') pprint(expected) try: assert result == expected except: # On failure, we compare again to get additional failure details such as # a clickable text_file path. data_file = licensish.data_file if not data_file: data_file = licensish.text_file.replace('.LICENSE', '.yml') result['files'] = [ f'file://{data_file}', f'file://{licensish.text_file}', ] # This assert will always fail and provide a more detailed failure trace assert saneyaml.dump(result) == saneyaml.dump(expected)
def generate_details(output_path): license_details_template = env.get_template("license_details.html") for license in licenses.values(): license_data = license.to_dict() html = license_details_template.render( **base_context, license=license, license_data=license_data, ) write_file(output_path, f"{license.key}.html", html) write_file(output_path, f"{license.key}.yml", saneyaml.dump(license_data)) write_file(output_path, f"{license.key}.json", json.dumps(license_data)) write_file(output_path, f"{license.key}.LICENSE", license.text)
def check_yaml_scan(expected_file, result_file, regen=False): """ Check the scan `result_file` YAML results against the `expected_file` expected YAML results. If `regen` is True the expected_file WILL BE overwritten with the new scan results from `results_file`. This is convenient for updating tests expectations. But use with caution. """ results = load_yaml_results(result_file) or {} if regen: with open(expected_file, 'w') as reg: reg.write(saneyaml.dump(results)) expected = load_yaml_results(expected_file) results.pop('headers', None) expected.pop('headers', None) # NOTE we redump the YAML as a string for a more efficient display of the # failures comparison/diff expected = saneyaml.dump(expected) results = saneyaml.dump(results) assert expected == results
def pretty(data): """ Return a unicode text pretty representation of data (as YAML or else) if data is a sequence or mapping or the data as-is otherwise """ if not data: return None seqtypes = list, tuple maptypes = OrderedDict, dict coltypes = seqtypes + maptypes if isinstance(data, seqtypes): if len(data) == 1 and isinstance(data[0], string_types): return data[0].strip() if isinstance(data, coltypes): return saneyaml.dump( data, indent=2, encoding='utf-8').decode('utf-8').strip() return data
def check_expected_parse_copyright_file( test_loc, expected_loc, regen=False, with_details=False, ): """ Check copyright parsing of `test_loc` location against an expected JSON file at `expected_loc` location. Regen the expected file if `regen` is True. """ if with_details: skip_debian_packaging = True simplify_licenses = True unique = True else: skip_debian_packaging = False simplify_licenses = False unique = False parsed = debian_copyright.parse_copyright_file( copyright_file=test_loc, skip_debian_packaging=skip_debian_packaging, simplify_licenses=simplify_licenses, unique=unique, ) result = saneyaml.dump(list(parsed)) if regen: with io.open(expected_loc, 'w', encoding='utf-8') as reg: reg.write(result) with io.open(expected_loc, encoding='utf-8') as ex: expected = ex.read() if result != expected: expected = '\n'.join([ 'file://' + test_loc, 'file://' + expected_loc, expected ]) assert result == expected
def closure_test_function(self): with io.open(test_file, encoding='utf-8') as inp: test_load = saneyaml.load(inp.read()) test_dump = saneyaml.dump(test_load) if regen: with io.open(expected_load_file, 'w', encoding='utf-8') as out: json.dump(test_load, out, indent=2) with io.open(expected_dump_file, 'w', encoding='utf-8') as out: out.write(test_dump) with io.open(expected_load_file, encoding='utf-8') as inp: expected_load = json.load(inp) with io.open(expected_dump_file, encoding='utf-8') as inp: expected_dump = inp.read() assert expected_load == test_load assert expected_dump == test_dump
def check_expected(test_loc, expected_loc, regen=False): """ Check copyright parsing of `test_loc` location against an expected JSON file at `expected_loc` location. Regen the expected file if `regen` is True. """ result = saneyaml.dump( list(debian_copyright.parse_copyright_file(test_loc))) if regen: with io.open(expected_loc, 'w', encoding='utf-8') as reg: reg.write(result) with io.open(expected_loc, encoding='utf-8') as ex: expected = ex.read() if expected != result: expected = '\n'.join( ['file://' + test_loc, 'file://' + expected_loc, expected]) assert expected == result
def generate_indexes(output_path): license_list_template = env.get_template("license_list.html") index_html = license_list_template.render( **base_context, licenses=licenses, ) write_file(output_path, "index.html", index_html) index = [ { "license_key": key, "spdx_license_key": license.spdx_license_key, "other_spdx_license_keys": license.other_spdx_license_keys, "is_exception": license.is_exception, "is_deprecated": license.is_deprecated, "json": f"{key}.json", "yml": f"{key}.yml", "html": f"{key}.html", "text": f"{key}.LICENSE", } for key, license in licenses.items() ] write_file(output_path, "index.json", json.dumps(index)) write_file(output_path, "index.yml", saneyaml.dump(index))
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) project = self.object files_qs = project.codebaseresources.files() file_filter = self.request.GET.get("file-filter", "all") if file_filter == "in-a-package": files_qs = files_qs.in_package() elif file_filter == "not-in-a-package": files_qs = files_qs.not_in_package() files = files_qs.only( "programming_language", "mime_type", "holders", "copyrights", "license_expressions", ) packages = project.discoveredpackages.all().only( "type", "license_expression", ) file_languages = files.values_list("programming_language", flat=True) file_mime_types = files.values_list("mime_type", flat=True) file_holders = self.data_from_model_field(files, "holders", "value") file_copyrights = self.data_from_model_field(files, "copyrights", "value") file_license_keys = self.data_from_model_field(files, "licenses", "key") file_license_categories = self.data_from_model_field( files, "licenses", "category" ) file_compliance_alert = [] if scanpipe_app.policies_enabled: file_compliance_alert = files.values_list("compliance_alert", flat=True) package_licenses = packages.values_list("license_expression", flat=True) package_types = packages.values_list("type", flat=True) inputs, missing_inputs = project.inputs_with_source if missing_inputs: message = ( "The following input files are not available on disk anymore:\n- " + "\n- ".join(missing_inputs.keys()) ) messages.error(self.request, message) context.update( { "inputs_with_source": inputs, "programming_languages": self.get_summary(file_languages), "mime_types": self.get_summary(file_mime_types), "holders": self.get_summary(file_holders), "copyrights": self.get_summary(file_copyrights), "file_license_keys": self.get_summary(file_license_keys), "file_license_categories": self.get_summary(file_license_categories), "file_compliance_alert": self.get_summary(file_compliance_alert), "package_licenses": self.get_summary(package_licenses), "package_types": self.get_summary(package_types), "file_filter": file_filter, "add_pipeline_form": AddPipelineForm(), "add_inputs_form": AddInputsForm(), } ) if project.extra_data: context["extra_data_yaml"] = saneyaml.dump(project.extra_data, indent=2) return context
def dump(self): parent = fileutils.parent_directory(self.data_file) if not exists(parent): fileutils.create_dir(parent) with open(self.data_file, 'w') as df: df.write(saneyaml.dump(self.to_dict()))
def dumps(self): """ Return a string representation of self in YAML block format. """ return saneyaml.dump(self.to_dict())
def write_yaml(results, output_file, **kwargs): """ Write `results` to the `output_file` opened file-like object. """ output_file.write(saneyaml.dump(results, indent=4)) output_file.write('\n')
def check_expected_parse_copyright_file( test_loc, expected_loc, regen=False, simplified=False, _licensing=Licensing(), ): ''' Check copyright parsing of `test_loc` location against an expected JSON file at `expected_loc` location. Regen the expected file if `regen` is True. ''' if simplified: filter_duplicates = True skip_debian_packaging = True simplify_licenses = True unique_copyrights = True else: filter_duplicates = False skip_debian_packaging = False simplify_licenses = False unique_copyrights = False try: dc = debian_copyright.parse_copyright_file( location=test_loc, check_consistency=False, ) declared_license = dc.get_declared_license( filter_duplicates=filter_duplicates, skip_debian_packaging=skip_debian_packaging, ) license_expression = dc.get_license_expression( skip_debian_packaging=skip_debian_packaging, simplify_licenses=simplify_licenses, ) license_expression_keys = set(_licensing.license_keys(license_expression)) copyrght = dc.get_copyright( skip_debian_packaging=skip_debian_packaging, unique_copyrights=unique_copyrights, ).strip() primary_license = dc.primary_license match_details = list(map(get_match_details, dc.license_matches)) results = { 'primary_license': primary_license, 'declared_license': declared_license, 'license_expression': license_expression, 'copyright': copyrght, 'matches': match_details, } if regen: expected = results with open(expected_loc, 'w') as res: res.write(saneyaml.dump(results)) else: with open(expected_loc) as ex: expected = saneyaml.load(ex.read()) except Exception as e: import traceback files = [ 'file://' + test_loc, 'file://' + expected_loc, ] raise Exception(repr(e), traceback.format_exc(), files) from e if ( not regen and (saneyaml.dump(results) != saneyaml.dump(expected) or 'unknown-license-reference' in license_expression_keys) ) : res = { 'test_loc': f'file://{test_loc}', 'expected_loc': f'file://{expected_loc}', } res.update(results) results = saneyaml.dump(res) results = results.replace( 'unknown-license-reference', 'unknown-license-reference should not be detected', ) assert results == saneyaml.dump(expected)
def test_dump_does_handles_numbers_and_booleans_correctly(self): test = [None, dict([(1, None), (123.34, 'tha')])] expected = ("-\n" "- 1:\n" " '123.34': tha\n") assert expected == saneyaml.dump(test)
def test_dump_increases_indents_correctly(self): test = {'a': [1, [2, 3, [4, 5]]]} expected = 'a:\n - 1\n - - 2\n - 3\n - - 4\n - 5\n' assert expected == saneyaml.dump(test)
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) project = self.object input_path = project.input_path context["inputs"] = [ (path.relative_to(input_path), path.is_file()) for path in input_path.glob("*") ] files_qs = project.codebaseresources.files() file_filter = self.request.GET.get("file-filter", "all") if file_filter == "in-a-package": files_qs = files_qs.in_package() elif file_filter == "not-in-a-package": files_qs = files_qs.not_in_package() files = files_qs.only( "programming_language", "mime_type", "holders", "copyrights", "license_expressions", ) packages = project.discoveredpackages.all().only( "type", "license_expression", ) file_languages = files.values_list("programming_language", flat=True) file_mime_types = files.values_list("mime_type", flat=True) file_holders = self.data_from_model_field(files, "holders", "value") file_copyrights = self.data_from_model_field(files, "copyrights", "value") file_license_keys = self.data_from_model_field(files, "licenses", "key") file_license_categories = self.data_from_model_field( files, "licenses", "category" ) package_licenses = packages.values_list("license_expression", flat=True) package_types = packages.values_list("type", flat=True) context.update( { "programming_languages": self.get_summary(file_languages), "mime_types": self.get_summary(file_mime_types), "holders": self.get_summary(file_holders), "copyrights": self.get_summary(file_copyrights), "file_license_keys": self.get_summary(file_license_keys), "file_license_categories": self.get_summary(file_license_categories), "package_licenses": self.get_summary(package_licenses), "package_types": self.get_summary(package_types), "file_filter": file_filter, "add_pipeline_form": AddPipelineForm(), } ) if project.extra_data: context["extra_data_yaml"] = saneyaml.dump(project.extra_data, indent=2) return context
def test_dump_converts_bytes_to_unicode_correctly(self): test = {b'a': b'foo'} expected = 'a: foo\n' assert expected == saneyaml.dump(test)