예제 #1
0
 def test_resource_paths_with_glob_file(self):
     test_dir = self.extract_test_tar('ignore/user.tgz')
     test_plugin = ProcessIgnore(('*.doc', ))
     scan_cache_class = get_scans_cache_class(self.get_temp_dir())
     expected = [
         'user', 'user/src', 'user/src/test', 'user/src/test/sample.txt'
     ]
     test = [
         resource.rel_path for resource in resource_paths(
             test_dir, False, scan_cache_class, [test_plugin])
     ]
     assert expected == sorted(test)
 def test_resource_paths_with_glob_path(self):
     test_dir = self.extract_test_tar('ignore/user.tgz')
     test_plugin = ProcessIgnore(('*/src/test',))
     scan_cache_class = get_scans_cache_class(self.get_temp_dir())
     expected = [
         'user',
         'user/ignore.doc',
         'user/src',
         'user/src/ignore.doc'
     ]
     test = [resource.rel_path for resource in resource_paths(test_dir, False, scan_cache_class, [test_plugin])]
     assert expected == sorted(test)
예제 #3
0
 def test_resource_paths_with_multiple_plugins(self):
     test_dir = self.extract_test_tar('ignore/user.tgz')
     scan_cache_class = get_scans_cache_class(self.get_temp_dir())
     test_plugins = [
         ProcessIgnore(('*.doc', )),
         ProcessIgnore(('*/src/test/*', ))
     ]
     expected = ['user', 'user/src', 'user/src/test']
     test = [
         resource.rel_path for resource in resource_paths(
             test_dir, False, scan_cache_class, test_plugins)
     ]
     assert expected == sorted(test)
 def test_resource_paths_with_multiple_plugins(self):
     test_dir = self.extract_test_tar('ignore/user.tgz')
     scan_cache_class = get_scans_cache_class(self.get_temp_dir())
     test_plugins = [
         ProcessIgnore(('*.doc',)),
         ProcessIgnore(('*/src/test/*',))
     ]
     expected = [
         'user',
         'user/src',
         'user/src/test'
     ]
     test = [resource.rel_path for resource in resource_paths(test_dir, False, scan_cache_class, test_plugins)]
     assert expected == sorted(test)
예제 #5
0
def scancode(ctx, input, output_file, copyright, license, package, email, url,
             info, license_score, license_text, only_findings, strip_root,
             full_root, format, ignore, verbose, quiet, processes, diag,
             timeout, *args, **kwargs):
    """scan the <input> file or directory for origin clues and license and save results to the <output_file>.

    The scan results are printed to stdout if <output_file> is not provided.
    Error and progress is printed to stderr.
    """

    validate_exclusive(ctx, ['strip_root', 'full_root'])

    possible_scans = OrderedDict([('infos', info), ('licenses', license),
                                  ('copyrights', copyright),
                                  ('packages', package), ('emails', email),
                                  ('urls', url)])

    options = OrderedDict([
        ('--copyright', copyright),
        ('--license', license),
        ('--package', package),
        ('--email', email),
        ('--url', url),
        ('--info', info),
        ('--license-score', license_score),
        ('--license-text', license_text),
        ('--only-findings', only_findings),
        ('--strip-root', strip_root),
        ('--full-root', full_root),
        ('--ignore', ignore),
        ('--format', format),
        ('--diag', diag),
    ])

    # Use default scan options when no options are provided on the command line.
    if not any(possible_scans.values()):
        possible_scans['copyrights'] = True
        possible_scans['licenses'] = True
        possible_scans['packages'] = True
        options['--copyright'] = True
        options['--license'] = True
        options['--package'] = True

    # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s.
    if format in ('spdx-tv', 'spdx-rdf'):
        possible_scans['infos'] = True

    for key in options:
        if key == "--license-score":
            continue
        if options[key] == False:
            del options[key]

    get_licenses_with_score = partial(get_licenses,
                                      min_score=license_score,
                                      include_text=license_text,
                                      diag=diag)

    # List of scan functions in the same order as "possible_scans".
    scan_functions = [
        None,  # For "infos" there is no separate scan function, they are always gathered, though not always exposed.
        get_licenses_with_score,
        get_copyrights,
        get_package_infos,
        get_emails,
        get_urls
    ]

    # FIXME: this is does not make sense to use tuple and positional values
    scanners = OrderedDict(
        zip(possible_scans.keys(), zip(possible_scans.values(),
                                       scan_functions)))

    scans_cache_class = get_scans_cache_class()

    user_ignore = {
        patt: 'User ignore: Supplied by --ignore'
        for patt in ignore
    }

    try:
        files_count, results, success = scan(
            input_path=input,
            scanners=scanners,
            verbose=verbose,
            quiet=quiet,
            processes=processes,
            timeout=timeout,
            diag=diag,
            scans_cache_class=scans_cache_class,
            strip_root=strip_root,
            full_root=full_root,
            ignore=user_ignore)

        if not quiet:
            echo_stderr('Saving results.', fg='green')

        save_results(scanners, only_findings, files_count, results, format,
                     options, input, output_file)

    finally:
        # cleanup
        cache = scans_cache_class()
        cache.clear()

    rc = 0 if success else 1
    ctx.exit(rc)
예제 #6
0
def scancode(ctx,
             input, output_file,
             copyright, license, package,
             email, url, info,
             license_score, license_text, license_url_template,
             strip_root, full_root,
             format, verbose, quiet, processes,
             diag, timeout, *args, **kwargs):
    """scan the <input> file or directory for origin clues and license and save results to the <output_file>.

    The scan results are printed to stdout if <output_file> is not provided.
    Error and progress is printed to stderr.
    """

    validate_exclusive(ctx, ['strip_root', 'full_root'])

    possible_scans = OrderedDict([
        ('infos', info),
        ('licenses', license),
        ('copyrights', copyright),
        ('packages', package),
        ('emails', email),
        ('urls', url)
    ])

    options = OrderedDict([
        ('--copyright', copyright),
        ('--license', license),
        ('--package', package),
        ('--email', email),
        ('--url', url),
        ('--info', info),
        ('--license-score', license_score),
        ('--license-text', license_text),
        ('--strip-root', strip_root),
        ('--full-root', full_root),
        ('--format', format),
        ('--diag', diag),
    ])

    # Use default scan options when no options are provided on the command line.
    if not any(possible_scans.values()):
        possible_scans['copyrights'] = True
        possible_scans['licenses'] = True
        possible_scans['packages'] = True
        options['--copyright'] = True
        options['--license'] = True
        options['--package'] = True

    # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s.
    if format in ('spdx-tv', 'spdx-rdf'):
        possible_scans['infos'] = True

    # FIXME: pombredanne: what is this? I cannot understand what this does
    for key in options:
        if key == "--license-score":
            continue
        if options[key] == False:
            del options[key]

    get_licenses_with_score = partial(get_licenses, min_score=license_score, include_text=license_text, diag=diag, license_url_template=license_url_template)

    # List of scan functions in the same order as "possible_scans".
    scan_functions = [
        None,  # For "infos" there is no separate scan function, they are always gathered, though not always exposed.
        get_licenses_with_score,
        get_copyrights,
        get_package_infos,
        get_emails,
        get_urls
    ]

    # FIXME: this is does not make sense to use tuple and positional values
    scanners = OrderedDict(zip(possible_scans.keys(), zip(possible_scans.values(), scan_functions)))

    scans_cache_class = get_scans_cache_class()
    pre_scan_plugins = []
    for name, plugin in plugincode.pre_scan.get_pre_scan_plugins().items():
        user_input = kwargs[name.replace('-', '_')]
        if user_input:
            options['--' + name] = user_input
            pre_scan_plugins.append(plugin(user_input))

    try:
        files_count, results, success = scan(
            input_path=input,
            scanners=scanners,
            verbose=verbose,
            quiet=quiet,
            processes=processes,
            timeout=timeout,
            diag=diag,
            scans_cache_class=scans_cache_class,
            strip_root=strip_root,
            full_root=full_root,
            pre_scan_plugins=pre_scan_plugins)

        # Find all scans that are both enabled and have a valid function
        # reference. This deliberately filters out the "info" scan
        # (which always has a "None" function reference) as there is no
        # dedicated "infos" key in the results that "plugin_only_findings.has_findings()"
        # could check.
        # FIXME: we should not use positional tings tuples for v[0], v[1] that are mysterious values for now
        active_scans = [k for k, v in scanners.items() if v[0] and v[1]]

        has_requested_post_scan_plugins = False

        for option, post_scan_handler in plugincode.post_scan.get_post_scan_plugins().items():
            is_requested = kwargs[option.replace('-', '_')]
            if is_requested:
                options['--' + option] = True
                if not quiet:
                    echo_stderr('Running post-scan plugin: %(option)s...' % locals(), fg='green')
                results = post_scan_handler(active_scans, results)
                has_requested_post_scan_plugins = True

        if has_requested_post_scan_plugins:
            # FIXME: computing len needs a list and therefore needs loading it all ahead of time
            results = list(results)
            files_count = len(results)

        if not quiet:
            echo_stderr('Saving results.', fg='green')

        # FIXME: we should have simpler args: a scan "header" and scan results
        save_results(scanners, files_count, results, format, options, input, output_file)

    finally:
        # cleanup
        cache = scans_cache_class()
        cache.clear()

    rc = 0 if success else 1
    ctx.exit(rc)
예제 #7
0
def scancode(ctx, input, output_file, copyright, license, package, email, url,
             info, license_score, license_text, license_url_template,
             strip_root, full_root, format, verbose, quiet, processes, diag,
             timeout, *args, **kwargs):
    """scan the <input> file or directory for origin clues and license and save results to the <output_file>.

    The scan results are printed to stdout if <output_file> is not provided.
    Error and progress is printed to stderr.
    """

    validate_exclusive(ctx, ['strip_root', 'full_root'])

    possible_scans = OrderedDict([('infos', info), ('licenses', license),
                                  ('copyrights', copyright),
                                  ('packages', package), ('emails', email),
                                  ('urls', url)])

    options = OrderedDict([
        ('--copyright', copyright),
        ('--license', license),
        ('--package', package),
        ('--email', email),
        ('--url', url),
        ('--info', info),
        ('--license-score', license_score),
        ('--license-text', license_text),
        ('--strip-root', strip_root),
        ('--full-root', full_root),
        ('--format', format),
        ('--diag', diag),
    ])

    # Use default scan options when no options are provided on the command line.
    if not any(possible_scans.values()):
        possible_scans['copyrights'] = True
        possible_scans['licenses'] = True
        possible_scans['packages'] = True
        options['--copyright'] = True
        options['--license'] = True
        options['--package'] = True

    # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s.
    if format in ('spdx-tv', 'spdx-rdf'):
        possible_scans['infos'] = True

    # FIXME: pombredanne: what is this? I cannot understand what this does
    for key in options:
        if key == "--license-score":
            continue
        if options[key] == False:
            del options[key]

    get_licenses_with_score = partial(
        get_licenses,
        min_score=license_score,
        include_text=license_text,
        diag=diag,
        license_url_template=license_url_template)

    # List of scan functions in the same order as "possible_scans".
    scan_functions = [
        None,  # For "infos" there is no separate scan function, they are always gathered, though not always exposed.
        get_licenses_with_score,
        get_copyrights,
        get_package_infos,
        get_emails,
        get_urls
    ]

    # FIXME: this is does not make sense to use tuple and positional values
    scanners = OrderedDict(
        zip(possible_scans.keys(), zip(possible_scans.values(),
                                       scan_functions)))

    scans_cache_class = get_scans_cache_class()
    pre_scan_plugins = []
    for name, plugin in plugincode.pre_scan.get_pre_scan_plugins().items():
        user_input = kwargs[name.replace('-', '_')]
        if user_input:
            options['--' + name] = user_input
            pre_scan_plugins.append(plugin(user_input))

    try:
        files_count, results, success = scan(
            input_path=input,
            scanners=scanners,
            verbose=verbose,
            quiet=quiet,
            processes=processes,
            timeout=timeout,
            diag=diag,
            scans_cache_class=scans_cache_class,
            strip_root=strip_root,
            full_root=full_root,
            pre_scan_plugins=pre_scan_plugins)

        # Find all scans that are both enabled and have a valid function
        # reference. This deliberately filters out the "info" scan
        # (which always has a "None" function reference) as there is no
        # dedicated "infos" key in the results that "plugin_only_findings.has_findings()"
        # could check.
        # FIXME: we should not use positional tings tuples for v[0], v[1] that are mysterious values for now
        active_scans = [k for k, v in scanners.items() if v[0] and v[1]]

        has_requested_post_scan_plugins = False

        for option, post_scan_handler in plugincode.post_scan.get_post_scan_plugins(
        ).items():
            is_requested = kwargs[option.replace('-', '_')]
            if is_requested:
                options['--' + option] = True
                if not quiet:
                    echo_stderr('Running post-scan plugin: %(option)s...' %
                                locals(),
                                fg='green')
                results = post_scan_handler(active_scans, results)
                has_requested_post_scan_plugins = True

        if has_requested_post_scan_plugins:
            # FIXME: computing len needs a list and therefore needs loading it all ahead of time
            results = list(results)
            files_count = len(results)

        if not quiet:
            echo_stderr('Saving results.', fg='green')

        # FIXME: we should have simpler args: a scan "header" and scan results
        save_results(scanners, files_count, results, format, options, input,
                     output_file)

    finally:
        # cleanup
        cache = scans_cache_class()
        cache.clear()

    rc = 0 if success else 1
    ctx.exit(rc)