def test_resource_paths_with_glob_file(self): test_dir = self.extract_test_tar('ignore/user.tgz') test_plugin = ProcessIgnore(('*.doc', )) scan_cache_class = get_scans_cache_class(self.get_temp_dir()) expected = [ 'user', 'user/src', 'user/src/test', 'user/src/test/sample.txt' ] test = [ resource.rel_path for resource in resource_paths( test_dir, False, scan_cache_class, [test_plugin]) ] assert expected == sorted(test)
def test_resource_paths_with_glob_path(self): test_dir = self.extract_test_tar('ignore/user.tgz') test_plugin = ProcessIgnore(('*/src/test',)) scan_cache_class = get_scans_cache_class(self.get_temp_dir()) expected = [ 'user', 'user/ignore.doc', 'user/src', 'user/src/ignore.doc' ] test = [resource.rel_path for resource in resource_paths(test_dir, False, scan_cache_class, [test_plugin])] assert expected == sorted(test)
def test_resource_paths_with_multiple_plugins(self): test_dir = self.extract_test_tar('ignore/user.tgz') scan_cache_class = get_scans_cache_class(self.get_temp_dir()) test_plugins = [ ProcessIgnore(('*.doc', )), ProcessIgnore(('*/src/test/*', )) ] expected = ['user', 'user/src', 'user/src/test'] test = [ resource.rel_path for resource in resource_paths( test_dir, False, scan_cache_class, test_plugins) ] assert expected == sorted(test)
def test_resource_paths_with_multiple_plugins(self): test_dir = self.extract_test_tar('ignore/user.tgz') scan_cache_class = get_scans_cache_class(self.get_temp_dir()) test_plugins = [ ProcessIgnore(('*.doc',)), ProcessIgnore(('*/src/test/*',)) ] expected = [ 'user', 'user/src', 'user/src/test' ] test = [resource.rel_path for resource in resource_paths(test_dir, False, scan_cache_class, test_plugins)] assert expected == sorted(test)
def scancode(ctx, input, output_file, copyright, license, package, email, url, info, license_score, license_text, only_findings, strip_root, full_root, format, ignore, verbose, quiet, processes, diag, timeout, *args, **kwargs): """scan the <input> file or directory for origin clues and license and save results to the <output_file>. The scan results are printed to stdout if <output_file> is not provided. Error and progress is printed to stderr. """ validate_exclusive(ctx, ['strip_root', 'full_root']) possible_scans = OrderedDict([('infos', info), ('licenses', license), ('copyrights', copyright), ('packages', package), ('emails', email), ('urls', url)]) options = OrderedDict([ ('--copyright', copyright), ('--license', license), ('--package', package), ('--email', email), ('--url', url), ('--info', info), ('--license-score', license_score), ('--license-text', license_text), ('--only-findings', only_findings), ('--strip-root', strip_root), ('--full-root', full_root), ('--ignore', ignore), ('--format', format), ('--diag', diag), ]) # Use default scan options when no options are provided on the command line. if not any(possible_scans.values()): possible_scans['copyrights'] = True possible_scans['licenses'] = True possible_scans['packages'] = True options['--copyright'] = True options['--license'] = True options['--package'] = True # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s. if format in ('spdx-tv', 'spdx-rdf'): possible_scans['infos'] = True for key in options: if key == "--license-score": continue if options[key] == False: del options[key] get_licenses_with_score = partial(get_licenses, min_score=license_score, include_text=license_text, diag=diag) # List of scan functions in the same order as "possible_scans". scan_functions = [ None, # For "infos" there is no separate scan function, they are always gathered, though not always exposed. get_licenses_with_score, get_copyrights, get_package_infos, get_emails, get_urls ] # FIXME: this is does not make sense to use tuple and positional values scanners = OrderedDict( zip(possible_scans.keys(), zip(possible_scans.values(), scan_functions))) scans_cache_class = get_scans_cache_class() user_ignore = { patt: 'User ignore: Supplied by --ignore' for patt in ignore } try: files_count, results, success = scan( input_path=input, scanners=scanners, verbose=verbose, quiet=quiet, processes=processes, timeout=timeout, diag=diag, scans_cache_class=scans_cache_class, strip_root=strip_root, full_root=full_root, ignore=user_ignore) if not quiet: echo_stderr('Saving results.', fg='green') save_results(scanners, only_findings, files_count, results, format, options, input, output_file) finally: # cleanup cache = scans_cache_class() cache.clear() rc = 0 if success else 1 ctx.exit(rc)
def scancode(ctx, input, output_file, copyright, license, package, email, url, info, license_score, license_text, license_url_template, strip_root, full_root, format, verbose, quiet, processes, diag, timeout, *args, **kwargs): """scan the <input> file or directory for origin clues and license and save results to the <output_file>. The scan results are printed to stdout if <output_file> is not provided. Error and progress is printed to stderr. """ validate_exclusive(ctx, ['strip_root', 'full_root']) possible_scans = OrderedDict([ ('infos', info), ('licenses', license), ('copyrights', copyright), ('packages', package), ('emails', email), ('urls', url) ]) options = OrderedDict([ ('--copyright', copyright), ('--license', license), ('--package', package), ('--email', email), ('--url', url), ('--info', info), ('--license-score', license_score), ('--license-text', license_text), ('--strip-root', strip_root), ('--full-root', full_root), ('--format', format), ('--diag', diag), ]) # Use default scan options when no options are provided on the command line. if not any(possible_scans.values()): possible_scans['copyrights'] = True possible_scans['licenses'] = True possible_scans['packages'] = True options['--copyright'] = True options['--license'] = True options['--package'] = True # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s. if format in ('spdx-tv', 'spdx-rdf'): possible_scans['infos'] = True # FIXME: pombredanne: what is this? I cannot understand what this does for key in options: if key == "--license-score": continue if options[key] == False: del options[key] get_licenses_with_score = partial(get_licenses, min_score=license_score, include_text=license_text, diag=diag, license_url_template=license_url_template) # List of scan functions in the same order as "possible_scans". scan_functions = [ None, # For "infos" there is no separate scan function, they are always gathered, though not always exposed. get_licenses_with_score, get_copyrights, get_package_infos, get_emails, get_urls ] # FIXME: this is does not make sense to use tuple and positional values scanners = OrderedDict(zip(possible_scans.keys(), zip(possible_scans.values(), scan_functions))) scans_cache_class = get_scans_cache_class() pre_scan_plugins = [] for name, plugin in plugincode.pre_scan.get_pre_scan_plugins().items(): user_input = kwargs[name.replace('-', '_')] if user_input: options['--' + name] = user_input pre_scan_plugins.append(plugin(user_input)) try: files_count, results, success = scan( input_path=input, scanners=scanners, verbose=verbose, quiet=quiet, processes=processes, timeout=timeout, diag=diag, scans_cache_class=scans_cache_class, strip_root=strip_root, full_root=full_root, pre_scan_plugins=pre_scan_plugins) # Find all scans that are both enabled and have a valid function # reference. This deliberately filters out the "info" scan # (which always has a "None" function reference) as there is no # dedicated "infos" key in the results that "plugin_only_findings.has_findings()" # could check. # FIXME: we should not use positional tings tuples for v[0], v[1] that are mysterious values for now active_scans = [k for k, v in scanners.items() if v[0] and v[1]] has_requested_post_scan_plugins = False for option, post_scan_handler in plugincode.post_scan.get_post_scan_plugins().items(): is_requested = kwargs[option.replace('-', '_')] if is_requested: options['--' + option] = True if not quiet: echo_stderr('Running post-scan plugin: %(option)s...' % locals(), fg='green') results = post_scan_handler(active_scans, results) has_requested_post_scan_plugins = True if has_requested_post_scan_plugins: # FIXME: computing len needs a list and therefore needs loading it all ahead of time results = list(results) files_count = len(results) if not quiet: echo_stderr('Saving results.', fg='green') # FIXME: we should have simpler args: a scan "header" and scan results save_results(scanners, files_count, results, format, options, input, output_file) finally: # cleanup cache = scans_cache_class() cache.clear() rc = 0 if success else 1 ctx.exit(rc)
def scancode(ctx, input, output_file, copyright, license, package, email, url, info, license_score, license_text, license_url_template, strip_root, full_root, format, verbose, quiet, processes, diag, timeout, *args, **kwargs): """scan the <input> file or directory for origin clues and license and save results to the <output_file>. The scan results are printed to stdout if <output_file> is not provided. Error and progress is printed to stderr. """ validate_exclusive(ctx, ['strip_root', 'full_root']) possible_scans = OrderedDict([('infos', info), ('licenses', license), ('copyrights', copyright), ('packages', package), ('emails', email), ('urls', url)]) options = OrderedDict([ ('--copyright', copyright), ('--license', license), ('--package', package), ('--email', email), ('--url', url), ('--info', info), ('--license-score', license_score), ('--license-text', license_text), ('--strip-root', strip_root), ('--full-root', full_root), ('--format', format), ('--diag', diag), ]) # Use default scan options when no options are provided on the command line. if not any(possible_scans.values()): possible_scans['copyrights'] = True possible_scans['licenses'] = True possible_scans['packages'] = True options['--copyright'] = True options['--license'] = True options['--package'] = True # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s. if format in ('spdx-tv', 'spdx-rdf'): possible_scans['infos'] = True # FIXME: pombredanne: what is this? I cannot understand what this does for key in options: if key == "--license-score": continue if options[key] == False: del options[key] get_licenses_with_score = partial( get_licenses, min_score=license_score, include_text=license_text, diag=diag, license_url_template=license_url_template) # List of scan functions in the same order as "possible_scans". scan_functions = [ None, # For "infos" there is no separate scan function, they are always gathered, though not always exposed. get_licenses_with_score, get_copyrights, get_package_infos, get_emails, get_urls ] # FIXME: this is does not make sense to use tuple and positional values scanners = OrderedDict( zip(possible_scans.keys(), zip(possible_scans.values(), scan_functions))) scans_cache_class = get_scans_cache_class() pre_scan_plugins = [] for name, plugin in plugincode.pre_scan.get_pre_scan_plugins().items(): user_input = kwargs[name.replace('-', '_')] if user_input: options['--' + name] = user_input pre_scan_plugins.append(plugin(user_input)) try: files_count, results, success = scan( input_path=input, scanners=scanners, verbose=verbose, quiet=quiet, processes=processes, timeout=timeout, diag=diag, scans_cache_class=scans_cache_class, strip_root=strip_root, full_root=full_root, pre_scan_plugins=pre_scan_plugins) # Find all scans that are both enabled and have a valid function # reference. This deliberately filters out the "info" scan # (which always has a "None" function reference) as there is no # dedicated "infos" key in the results that "plugin_only_findings.has_findings()" # could check. # FIXME: we should not use positional tings tuples for v[0], v[1] that are mysterious values for now active_scans = [k for k, v in scanners.items() if v[0] and v[1]] has_requested_post_scan_plugins = False for option, post_scan_handler in plugincode.post_scan.get_post_scan_plugins( ).items(): is_requested = kwargs[option.replace('-', '_')] if is_requested: options['--' + option] = True if not quiet: echo_stderr('Running post-scan plugin: %(option)s...' % locals(), fg='green') results = post_scan_handler(active_scans, results) has_requested_post_scan_plugins = True if has_requested_post_scan_plugins: # FIXME: computing len needs a list and therefore needs loading it all ahead of time results = list(results) files_count = len(results) if not quiet: echo_stderr('Saving results.', fg='green') # FIXME: we should have simpler args: a scan "header" and scan results save_results(scanners, files_count, results, format, options, input, output_file) finally: # cleanup cache = scans_cache_class() cache.clear() rc = 0 if success else 1 ctx.exit(rc)