Example #1
0
def scan(input_path,
         scanners,
         verbose=False,
         quiet=False,
         processes=1,
         timeout=DEFAULT_TIMEOUT,
         diag=False,
         scans_cache_class=None,
         strip_root=False,
         full_root=False,
         ignore=None):
    """
    Return a tuple of (files_count, scan_results, success) where
    scan_results is an iterable and success is a boolean.

    Run each requested scan proper: each individual file scan is cached
    on disk to free memory. Then the whole set of scans is loaded from
    the cache and streamed at the end.
    """
    assert scans_cache_class
    scan_summary = OrderedDict()
    scan_summary['scanned_path'] = input_path
    scan_summary['processes'] = processes

    # Display scan start details
    ############################
    # FIXME: it does not make sense to use tuple and positional values
    scans = [k for k, v in scanners.items() if v[0]]
    _scans = ', '.join(scans)
    if not quiet:
        echo_stderr(
            'Scanning files for: %(_scans)s with %(processes)d process(es)...'
            % locals())

    scan_summary['scans'] = scans[:]
    scan_start = time()
    indexing_time = 0
    # FIXME: It does not make sense to use tuple and positional values
    with_licenses, _ = scanners.get('licenses', (False, ''))
    if with_licenses:
        # build index outside of the main loop for speed
        # this also ensures that forked processes will get the index on POSIX naturally
        if not quiet:
            echo_stderr('Building license detection index...',
                        fg='green',
                        nl=False)
        from licensedcode.cache import get_index
        get_index(False)
        indexing_time = time() - scan_start
        if not quiet:
            echo_stderr('Done.', fg='green', nl=True)

    scan_summary['indexing_time'] = indexing_time

    # TODO: handle pickling errors as in ./scancode -cilp   samples/ -n3: note they are only caused by a FanoutCache
    # TODO: handle other exceptions properly to avoid any hanging

    # maxtasksperchild helps with recycling processes in case of leaks
    pool = get_pool(processes=processes, maxtasksperchild=1000)
    ignore = ignore or {}
    resources = resource_paths(input_path, ignore)
    logfile_path = scans_cache_class().cache_files_log
    paths_with_error = []
    files_count = 0
    with codecs.open(logfile_path, 'w', encoding='utf-8') as logfile_fd:

        logged_resources = _resource_logger(logfile_fd, resources)

        scanit = partial(_scanit,
                         scanners=scanners,
                         scans_cache_class=scans_cache_class,
                         diag=diag,
                         timeout=timeout)

        max_file_name_len = compute_fn_max_len()
        # do not display a file name in progress bar if there is less than 5 chars available.
        display_fn = bool(max_file_name_len > 10)
        try:
            # Using chunksize is documented as much more efficient in the Python doc.
            # Yet "1" still provides a better and more progressive feedback.
            # With imap_unordered, results are returned as soon as ready and out of order.
            scanned_files = pool.imap_unordered(scanit,
                                                logged_resources,
                                                chunksize=1)
            pool.close()

            if not quiet:
                echo_stderr('Scanning files...', fg='green')

            def scan_event(item):
                """Progress event displayed each time a file is scanned"""
                if quiet or not item or not display_fn:
                    return ''
                _scan_success, _scanned_path = item
                if verbose:
                    _progress_line = _scanned_path
                else:
                    _progress_line = fixed_width_file_name(
                        _scanned_path, max_file_name_len)
                return style('Scanned: ') + style(
                    _progress_line, fg=_scan_success and 'green' or 'red')

            scanning_errors = []
            files_count = 0
            with progressmanager(scanned_files,
                                 item_show_func=scan_event,
                                 show_pos=True,
                                 verbose=verbose,
                                 quiet=quiet,
                                 file=sys.stderr) as scanned:
                while True:
                    try:
                        result = scanned.next()
                        scan_success, scanned_rel_path = result
                        if not scan_success:
                            paths_with_error.append(scanned_rel_path)
                        files_count += 1
                    except StopIteration:
                        break
                    except KeyboardInterrupt:
                        print('\nAborted with Ctrl+C!')
                        pool.terminate()
                        break
        finally:
            # ensure the pool is really dead to work around a Python 2.7.3 bug:
            # http://bugs.python.org/issue15101
            pool.terminate()

    # TODO: add stats to results somehow

    # Compute stats
    ##########################
    scan_summary['files_count'] = files_count
    scan_summary['files_with_errors'] = paths_with_error
    total_time = time() - scan_start
    scanning_time = total_time - indexing_time
    scan_summary['total_time'] = total_time
    scan_summary['scanning_time'] = scanning_time

    files_scanned_per_second = round(float(files_count) / scanning_time, 2)
    scan_summary['files_scanned_per_second'] = files_scanned_per_second

    if not quiet:
        # Display stats
        ##########################
        echo_stderr('Scanning done.', fg=paths_with_error and 'red' or 'green')
        if paths_with_error:
            if diag:
                echo_stderr('Some files failed to scan properly:', fg='red')
                # iterate cached results to collect all scan errors
                cached_scan = scans_cache_class()
                root_dir = _get_root_dir(input_path, strip_root, full_root)
                scan_results = cached_scan.iterate(
                    scans, root_dir, paths_subset=paths_with_error)
                for scan_result in scan_results:
                    errored_path = scan_result.get('path', '')
                    echo_stderr('Path: ' + errored_path, fg='red')
                    for error in scan_result.get('scan_errors', []):
                        for emsg in error.splitlines(False):
                            echo_stderr('  ' + emsg)
                    echo_stderr('')
            else:
                echo_stderr(
                    'Some files failed to scan properly. Use the --diag option for additional details:',
                    fg='red')
                for errored_path in paths_with_error:
                    echo_stderr(' ' + errored_path, fg='red')

        echo_stderr(
            'Scan statistics: %(files_count)d files scanned in %(total_time)ds.'
            % locals())
        echo_stderr(
            'Scan options:    %(_scans)s with %(processes)d process(es).' %
            locals())
        echo_stderr(
            'Scanning speed:  %(files_scanned_per_second)s files per sec.' %
            locals())
        echo_stderr('Scanning time:   %(scanning_time)ds.' % locals())
        echo_stderr('Indexing time:   %(indexing_time)ds.' % locals(),
                    reset=True)

    success = not paths_with_error
    # finally return an iterator on cached results
    cached_scan = scans_cache_class()
    root_dir = _get_root_dir(input_path, strip_root, full_root)
    return files_count, cached_scan.iterate(scans, root_dir), success
Example #2
0
def scan(input_path, copyright=True, license=True, package=True,  # @ReservedAssignment
         email=False, url=False, info=True, verbose=False, quiet=False):  # @ReservedAssignment
    """
    Do the scans proper, return results.
    """
    # save paths to report paths relative to the original input
    original_input = fileutils.as_posixpath(input_path)
    abs_input = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input_path)))

    # note: "flag and function" expressions return the function if flag is True
    scanners = {
        'copyrights': copyright and get_copyrights,
        'licenses': license and get_licenses,
        'packages': package and get_package_infos,
        'emails': email and get_emails,
        'urls': url and get_urls,
        'infos': info and get_file_infos,
    }

    results = []

    # note: we inline progress display functions to close on some args

    def scan_start():
        """Progress event displayed at start of scan"""
        return style('Scanning files...', fg='green')

    def scan_event(item):
        """Progress event displayed each time a file is scanned"""
        if item:
            line = verbose and item or fileutils.file_name(item) or ''
            return 'Scanning: %(line)s' % locals()

    def scan_end():
        """Progress event displayed at end of scan"""
        has_warnings = False
        has_errors = False
        summary = []
        summary_color = 'green'
        summary_color = has_warnings and 'yellow' or summary_color
        summary_color = has_errors and 'red' or summary_color
        summary.append(style('Scanning done.', fg=summary_color, reset=True))
        return '\n'.join(summary)

    ignored = partial(ignore.is_ignored, ignores=ignore.ignores_VCS, unignores={})
    resources = fileutils.resource_iter(abs_input, ignored=ignored)

    with utils.progressmanager(resources,
                               item_show_func=scan_event,
                               start_show_func=scan_start,
                               finish_show_func=scan_end,
                               verbose=verbose,
                               show_pos=True,
                               quiet=quiet
                               ) as progressive_resources:

        for resource in progressive_resources:
            res = fileutils.as_posixpath(resource)

            # fix paths: keep the location as relative to the original input
            relative_path = utils.get_relative_path(original_input, abs_input, res)
            scan_result = OrderedDict(location=relative_path)
            # Should we yield instead?
            scan_result.update(scan_one(res, scanners))
            results.append(scan_result)

    # TODO: eventually merge scans for the same resource location...
    # TODO: fix absolute paths as relative to original input argument...

    return results
def extractcode(ctx, input, verbose, quiet, shallow, *args, **kwargs):  # @ReservedAssignment
    """extract archives and compressed files found in the <input> file or directory tree.

    Use this command before scanning proper as an <input> preparation step.
    Archives found inside an extracted archive are extracted recursively.
    Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
    """

    abs_location = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input)))

    def extract_event(item):
        """
        Display an extract event.
        """
        if quiet:
            return ''
        if not item:
            return ''
        source = item.source
        if not isinstance(source, unicode):
            source = toascii(source, translit=True).decode('utf-8', 'replace')
        if verbose:
            if item.done:
                return ''
            line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or ''
        else:
            line = source and fileutils.file_name(source) or ''
        if not isinstance(line, unicode):
            line = toascii(line, translit=True).decode('utf-8', 'replace')
        return 'Extracting: %(line)s' % locals()

    def display_extract_summary():
        """
        Display a summary of warnings and errors if any.
        """
        has_warnings = False
        has_errors = False
        summary = []
        for xev in extract_results:
            has_errors = has_errors or bool(xev.errors)
            has_warnings = has_warnings or bool(xev.warnings)
            source = fileutils.as_posixpath(xev.source)
            if not isinstance(source, unicode):
                source = toascii(source, translit=True).decode('utf-8', 'replace')
                source = utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir)
            for e in xev.errors:
                echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red')
            for warn in xev.warnings:
                echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow')

        summary_color = 'green'
        if has_warnings:
            summary_color = 'yellow'
        if has_errors:
            summary_color = 'red'

        echo_stderr('Extracting done.', fg=summary_color, reset=True)


    # use for relative paths computation
    len_base_path = len(abs_location)
    base_is_dir = filetype.is_dir(abs_location)

    extract_results = []
    has_extract_errors = False
    if not quiet:
        echo_stderr('Extracting archives...', fg='green')

    with utils.progressmanager(extract_archives(abs_location, recurse=not shallow), item_show_func=extract_event,
                               verbose=verbose, quiet=quiet) as extraction_events:
        for xev in extraction_events:
            if xev.done and (xev.warnings or xev.errors):
                has_extract_errors = has_extract_errors or xev.errors
                extract_results.append(xev)

    if not quiet:
        display_extract_summary()

    rc = 1 if has_extract_errors else 0
    ctx.exit(rc)
Example #4
0
def extractcode(ctx, input, verbose, quiet, shallow, *args, **kwargs):  # @ReservedAssignment
    """extract archives and compressed files found in the <input> file or directory tree.

    Use this command before scanning proper as an <input> preparation step.
    Archives found inside an extracted archive are extracted recursively.
    Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
    """

    abs_location = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input)))

    def extract_event(item):
        """
        Display an extract event.
        """
        if quiet:
            return ''
        if not item:
            return ''
        source = item.source
        if not isinstance(source, unicode):
            source = toascii(source, translit=True).decode('utf-8', 'replace')
        if verbose:
            if item.done:
                return ''
            line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or ''
        else:
            line = source and fileutils.file_name(source) or ''
        if not isinstance(line, unicode):
            line = toascii(line, translit=True).decode('utf-8', 'replace')
        return 'Extracting: %(line)s' % locals()

    def display_extract_summary():
        """
        Display a summary of warnings and errors if any.
        """
        has_warnings = False
        has_errors = False
        summary = []
        for xev in extract_results:
            has_errors = has_errors or bool(xev.errors)
            has_warnings = has_warnings or bool(xev.warnings)
            source = fileutils.as_posixpath(xev.source)
            if not isinstance(source, unicode):
                source = toascii(source, translit=True).decode('utf-8', 'replace')
                source = utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir)
            for e in xev.errors:
                echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red')
            for warn in xev.warnings:
                echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow')

        summary_color = 'green'
        if has_warnings:
            summary_color = 'yellow'
        if has_errors:
            summary_color = 'red'

        echo_stderr('Extracting done.', fg=summary_color, reset=True)


    # use for relative paths computation
    len_base_path = len(abs_location)
    base_is_dir = filetype.is_dir(abs_location)

    extract_results = []
    has_extract_errors = False
    if not quiet:
        echo_stderr('Extracting archives...', fg='green')

    with utils.progressmanager(extract_archives(abs_location, recurse=not shallow), item_show_func=extract_event,
                               verbose=verbose, quiet=quiet) as extraction_events:
        for xev in extraction_events:
            if xev.done and (xev.warnings or xev.errors):
                has_extract_errors = has_extract_errors or xev.errors
                extract_results.append(xev)

    if not quiet:
        display_extract_summary()

    rc = 1 if has_extract_errors else 0
    ctx.exit(rc)
Example #5
0
def scan(input_path,
         output_file,
         scanners,
         license_score=0, license_text=False,
         verbose=False, quiet=False,
         processes=1, timeout=DEFAULT_TIMEOUT,
         diag=False,
         scans_cache_class=None,
         strip_root=False):
    """
    Return a tuple of (files_count, scan_results) where
    scan_results is an iterable. Run each requested scan proper: each individual file
    scan is cached on disk to free memory. Then the whole set of scans is loaded from
    the cache and streamed at the end.
    """
    assert scans_cache_class
    scan_summary = OrderedDict()
    scan_summary['scanned_path'] = input_path
    scan_summary['processes'] = processes
    get_licenses_with_score = partial(get_licenses, min_score=license_score, include_text=license_text, diag=diag)

    # Display scan start details
    ############################
    # FIXME: this is does not make sense to use tuple and positional values
    scans = [k for k, v in scanners.items() if v[0]]
    _scans = ', '.join(scans)
    if not quiet:
        echo_stderr('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals())
        save_logs('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals(),output_file)
    scan_summary['scans'] = scans[:]
    scan_start = time()
    indexing_time = 0
    # FIXME: this is does not make sense to use tuple and positional values
    with_licenses, _ = scanners.get('licenses', (False, ''))
    if with_licenses:
        # build index outside of the main loop
        # this also ensures that forked processes will get the index on POSIX naturally
        if not quiet:
            echo_stderr('Building license detection index...', fg='green', nl=False)
            save_logs('Building license detection index...',output_file)
        from licensedcode.index import get_index
        _idx = get_index()
        indexing_time = time() - scan_start
        if not quiet:
            echo_stderr('Done.', fg='green', nl=True)
            save_logs('Done.',output_file)
    scan_summary['indexing_time'] = indexing_time

    # TODO: handle pickling errors as in ./scancode -cilp   samples/ -n3: note they are only caused by a FanoutCache
    # TODO: handle other exceptions properly to avoid any hanging

    # maxtasksperchild helps with recycling processes in case of leaks
    pool = get_pool(processes=processes, maxtasksperchild=1000)
    resources = resource_paths(input_path)
    logfile_path = scans_cache_class().cache_files_log
    with open(logfile_path, 'wb') as logfile_fd:

        logged_resources = _resource_logger(logfile_fd, resources)

        scanit = partial(_scanit, scanners=scanners, scans_cache_class=scans_cache_class,
                         diag=diag, timeout=timeout)

        try:
            # Using chunksize is documented as much more efficient in the Python doc.
            # Yet "1" still provides a better and more progressive feedback.
            # With imap_unordered, results are returned as soon as ready and out of order.
            scanned_files = pool.imap_unordered(scanit, logged_resources, chunksize=1)
            pool.close()

            if not quiet:
                echo_stderr('Scanning files...', fg='green')
                save_logs('Scanning files...',output_file)
            def scan_event(item):
                """Progress event displayed each time a file is scanned"""
                if quiet:
                    return ''
                if item:
                    _scan_success, _scanned_path = item
                    _progress_line = verbose and _scanned_path or fixed_width_file_name(_scanned_path)
                    save_logs('Scanned: '+_progress_line,output_file)
                    return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red')

            scanning_errors = []
            files_count = 0
            with utils.progressmanager(scanned_files, item_show_func=scan_event,
                                       show_pos=True, verbose=verbose, quiet=quiet,
                                       file=sys.stderr) as scanned:
                while True:
                    try:
                        result = scanned.next()
                        scan_success, scanned_rel_path = result
                        if not scan_success:
                            scanning_errors.append(scanned_rel_path)
                        files_count += 1
                    except StopIteration:
                        break
                    except KeyboardInterrupt:
                        print('\nAborted with Ctrl+C!')
                        pool.terminate()
                        break
        finally:
            # ensure the pool is really dead to work around a Python 2.7.3 bug:
            # http://bugs.python.org/issue15101
            pool.terminate()

    # TODO: add stats to results somehow

    # Compute stats
    ##########################
    scan_summary['files_count'] = files_count
    scan_summary['files_with_errors'] = scanning_errors
    total_time = time() - scan_start
    scanning_time = total_time - indexing_time
    scan_summary['total_time'] = total_time
    scan_summary['scanning_time'] = scanning_time

    files_scanned_per_second = round(float(files_count) / scanning_time , 2)
    scan_summary['files_scanned_per_second'] = files_scanned_per_second

    if not quiet:
        # Display stats
        ##########################
        echo_stderr('Scanning done.', fg=scanning_errors and 'red' or 'green')
        save_logs('Scanning done.',output_file)
        if scanning_errors:
            echo_stderr('Some files failed to scan properly. See scan for details:', fg='red')
            save_logs('Some files failed to scan properly. See scan for details:',output_file)
            for errored_path in scanning_errors:
                echo_stderr(' ' + errored_path, fg='red')
                save_logs(' ' + errored_path,output_file)
        echo_stderr('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals())
        filename = os.path.basename(output_file.name).rsplit('.',1)[0] 
        fileinfo = locals()
        fileinfo['filename'] = filename
	conn = sqlite3.connect('data.db')
        conn.execute("UPDATE scanhistory SET number=%(files_count)d,scantime=%(total_time)d WHERE id='%(filename)s'" % fileinfo)
        conn.commit()
        conn.close()
        save_logs('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals(),output_file)
        echo_stderr('Scan options:    %(_scans)s with %(processes)d process(es).' % locals())
        save_logs('Scan options:    %(_scans)s with %(processes)d process(es).' % locals(),output_file)
        echo_stderr('Scanning speed:  %(files_scanned_per_second)s files per sec.' % locals())
        save_logs('Scanning speed:  %(files_scanned_per_second)s files per sec.' % locals(),output_file)
        echo_stderr('Scanning time:   %(scanning_time)ds.' % locals())
        save_logs('Scanning time:   %(scanning_time)ds.' % locals(),output_file)
        echo_stderr('Indexing time:   %(indexing_time)ds.' % locals(), reset=True)
        save_logs('Indexing time:   %(indexing_time)ds.' % locals(),output_file)
    # finally return an iterator on cached results
    cached_scan = scans_cache_class()
    root_dir = _get_root_dir(input_path, strip_root)
    return files_count, cached_scan.iterate(scans, root_dir)
Example #6
0
def scan(input_path,
         scanners,
         verbose=False, quiet=False,
         processes=1, timeout=DEFAULT_TIMEOUT,
         diag=False,
         scans_cache_class=None,
         strip_root=False,
         full_root=False,
         pre_scan_plugins=()):
    """
    Return a tuple of (files_count, scan_results, success) where
    scan_results is an iterable and success is a boolean.

    Run each requested scan proper: each individual file scan is cached
    on disk to free memory. Then the whole set of scans is loaded from
    the cache and streamed at the end.
    """
    assert scans_cache_class
    scan_summary = OrderedDict()
    scan_summary['scanned_path'] = input_path
    scan_summary['processes'] = processes

    # Display scan start details
    ############################
    # FIXME: it does not make sense to use tuple and positional values
    scans = [k for k, v in scanners.items() if v[0]]
    _scans = ', '.join(scans)
    if not quiet:
        echo_stderr('Scanning files for: %(_scans)s with %(processes)d process(es)...' % locals())

    scan_summary['scans'] = scans[:]
    scan_start = time()
    indexing_time = 0
    # FIXME: It does not make sense to use tuple and positional values
    with_licenses, _ = scanners.get('licenses', (False, ''))
    if with_licenses:
        # build index outside of the main loop for speed
        # this also ensures that forked processes will get the index on POSIX naturally
        if not quiet:
            echo_stderr('Building license detection index...', fg='green', nl=False)
        from licensedcode.cache import get_index
        get_index(False)
        indexing_time = time() - scan_start
        if not quiet:
            echo_stderr('Done.', fg='green', nl=True)

    scan_summary['indexing_time'] = indexing_time

    pool = None

    resources = resource_paths(input_path, diag, scans_cache_class, pre_scan_plugins=pre_scan_plugins)
    paths_with_error = []
    files_count = 0

    logfile_path = scans_cache_class().cache_files_log
    if on_linux:
        file_logger = partial(open, logfile_path, 'wb')
    else:
        file_logger = partial(codecs.open, logfile_path, 'w', encoding='utf-8')

    with file_logger() as logfile_fd:

        logged_resources = _resource_logger(logfile_fd, resources)

        scanit = partial(_scanit, scanners=scanners, scans_cache_class=scans_cache_class,
                         diag=diag, timeout=timeout, processes=processes)

        max_file_name_len = compute_fn_max_len()
        # do not display a file name in progress bar if there is less than 5 chars available.
        display_fn = bool(max_file_name_len > 10)
        try:
            if processes:
                # maxtasksperchild helps with recycling processes in case of leaks
                pool = get_pool(processes=processes, maxtasksperchild=1000)
                # Using chunksize is documented as much more efficient in the Python doc.
                # Yet "1" still provides a better and more progressive feedback.
                # With imap_unordered, results are returned as soon as ready and out of order.
                scanned_files = pool.imap_unordered(scanit, logged_resources, chunksize=1)
                pool.close()
            else:
                # no multiprocessing with processes=0
                scanned_files = imap(scanit, logged_resources)
                if not quiet:
                    echo_stderr('Disabling multi-processing and multi-threading...', fg='yellow')

            if not quiet:
                echo_stderr('Scanning files...', fg='green')

            def scan_event(item):
                """Progress event displayed each time a file is scanned"""
                if quiet or not item or not display_fn:
                    return ''
                _scan_success, _scanned_path = item
                _scanned_path = unicode(toascii(_scanned_path))
                if verbose:
                    _progress_line = _scanned_path
                else:
                    _progress_line = fixed_width_file_name(_scanned_path, max_file_name_len)
                return style('Scanned: ') + style(_progress_line, fg=_scan_success and 'green' or 'red')

            scanning_errors = []
            files_count = 0
            with progressmanager(
                scanned_files, item_show_func=scan_event, show_pos=True,
                verbose=verbose, quiet=quiet, file=sys.stderr) as scanned:
                while True:
                    try:
                        result = scanned.next()
                        scan_success, scanned_rel_path = result
                        if not scan_success:
                            paths_with_error.append(scanned_rel_path)
                        files_count += 1
                    except StopIteration:
                        break
                    except KeyboardInterrupt:
                        print('\nAborted with Ctrl+C!')
                        if pool:
                            pool.terminate()
                        break
        finally:
            if pool:
                # ensure the pool is really dead to work around a Python 2.7.3 bug:
                # http://bugs.python.org/issue15101
                pool.terminate()

    # TODO: add stats to results somehow

    # Compute stats
    ##########################
    scan_summary['files_count'] = files_count
    scan_summary['files_with_errors'] = paths_with_error
    total_time = time() - scan_start
    scanning_time = total_time - indexing_time
    scan_summary['total_time'] = total_time
    scan_summary['scanning_time'] = scanning_time

    files_scanned_per_second = round(float(files_count) / scanning_time , 2)
    scan_summary['files_scanned_per_second'] = files_scanned_per_second

    if not quiet:
        # Display stats
        ##########################
        echo_stderr('Scanning done.', fg=paths_with_error and 'red' or 'green')
        if paths_with_error:
            if diag:
                echo_stderr('Some files failed to scan properly:', fg='red')
                # iterate cached results to collect all scan errors
                cached_scan = scans_cache_class()
                root_dir = _get_root_dir(input_path, strip_root, full_root)
                scan_results = cached_scan.iterate(scans, root_dir, paths_subset=paths_with_error)
                for scan_result in scan_results:
                    errored_path = scan_result.get('path', '')
                    echo_stderr('Path: ' + errored_path, fg='red')
                    for error in scan_result.get('scan_errors', []):
                        for emsg in error.splitlines(False):
                            echo_stderr('  ' + emsg)
                    echo_stderr('')
            else:
                echo_stderr('Some files failed to scan properly. Use the --diag option for additional details:', fg='red')
                for errored_path in paths_with_error:
                    echo_stderr(' ' + errored_path, fg='red')

        echo_stderr('Scan statistics: %(files_count)d files scanned in %(total_time)ds.' % locals())
        echo_stderr('Scan options:    %(_scans)s with %(processes)d process(es).' % locals())
        echo_stderr('Scanning speed:  %(files_scanned_per_second)s files per sec.' % locals())
        echo_stderr('Scanning time:   %(scanning_time)ds.' % locals())
        echo_stderr('Indexing time:   %(indexing_time)ds.' % locals(), reset=True)

    success = not paths_with_error
    # finally return an iterator on cached results
    cached_scan = scans_cache_class()
    root_dir = _get_root_dir(input_path, strip_root, full_root)
    return files_count, cached_scan.iterate(scans, root_dir), success
def extractcode(ctx, input, verbose, quiet, *args,
                **kwargs):  # @ReservedAssignment
    """extract archives and compressed files found in the <input> file or directory tree.

    Use this command before scanning proper, as an <input> preparation step.
    Archives found inside an extracted archive are extracted recursively.
    Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
    """

    original_input = fileutils.as_posixpath(input)
    abs_input = fileutils.as_posixpath(
        os.path.abspath(os.path.expanduser(input)))

    # note: we inline functions so they can close on local variables

    def extract_start():
        return style('Extracting archives...', fg='green')

    def extract_event(item):
        """
        Display an extract event.
        """
        if not item:
            return ''
        if verbose:
            if item.done:
                return ''
            line = utils.get_relative_path(original_input, abs_input,
                                           as_posixpath(item.source)) or ''
        else:
            line = fileutils.file_name(item.source) or ''
        return 'Extracting: %(line)s' % locals()

    def extract_end():
        """
        Display a summary of warnings and errors if any.
        """
        has_warnings = False
        has_errors = False
        summary = []
        for xev in extract_results:
            has_errors = has_errors or bool(xev.errors)
            has_warnings = has_warnings or bool(xev.warnings)
            source = as_posixpath(xev.source)
            source = utils.get_relative_path(original_input, abs_input, source)
            for e in xev.errors:
                summary.append(
                    style('ERROR extracting: %(source)s: %(e)r' % locals(),
                          fg='red',
                          reset=False))
            for warn in xev.warnings:
                summary.append(
                    style('WARNING extracting: %(source)s: %(warn)r' %
                          locals(),
                          fg='yellow',
                          reset=False))

        summary_color = 'green'
        if has_warnings:
            summary_color = 'yellow'
        if has_errors:
            summary_color = 'red'

        summary.append(style('Extracting done.', fg=summary_color, reset=True))
        return '\n'.join(summary)

    extract_results = []
    has_extract_errors = False

    with utils.progressmanager(
            extract_archives(abs_input),
            item_show_func=extract_event,
            start_show_func=extract_start,
            finish_show_func=extract_end,
            verbose=verbose,
            quiet=quiet,
    ) as extraction_events:
        for xev in extraction_events:
            if xev.done and (xev.warnings or xev.errors):
                has_extract_errors = has_extract_errors or xev.errors
                extract_results.append(xev)

    rc = 1 if has_extract_errors else 0
    ctx.exit(rc)
Example #8
0
def scan(
        input_path,
        copyright=True,
        license=True,
        package=True,  # @ReservedAssignment
        info=True,
        verbose=False,
        quiet=False):  # @ReservedAssignment
    """
    Do the scans proper, return results.
    """
    # save paths to report paths relative to the original input
    original_input = fileutils.as_posixpath(input_path)
    abs_input = fileutils.as_posixpath(
        os.path.abspath(os.path.expanduser(input_path)))

    # note: "flag and function" expressions return the function if flag is True
    scanners = {
        'copyrights': copyright and get_copyrights,
        'licenses': license and get_licenses,
        'packages': package and get_package_infos,
        'infos': info and get_file_infos,
    }

    results = []

    # note: we inline progress display functions to close on some args

    def scan_start():
        """Progress event displayed at start of scan"""
        return style('Scanning files...', fg='green')

    def scan_event(item):
        """Progress event displayed each time a file is scanned"""
        if item:
            line = verbose and item or fileutils.file_name(item) or ''
            return 'Scanning: %(line)s' % locals()

    def scan_end():
        """Progress event displayed at end of scan"""
        has_warnings = False
        has_errors = False
        summary = []
        summary_color = 'green'
        summary_color = has_warnings and 'yellow' or summary_color
        summary_color = has_errors and 'red' or summary_color
        summary.append(style('Scanning done.', fg=summary_color, reset=True))
        return '\n'.join(summary)

    ignored = partial(ignore.is_ignored,
                      ignores=ignore.ignores_VCS,
                      unignores={})
    resources = fileutils.resource_iter(abs_input, ignored=ignored)

    with utils.progressmanager(resources,
                               item_show_func=scan_event,
                               start_show_func=scan_start,
                               finish_show_func=scan_end,
                               verbose=verbose,
                               show_pos=True,
                               quiet=quiet) as progressive_resources:

        for resource in progressive_resources:
            res = fileutils.as_posixpath(resource)

            # fix paths: keep the location as relative to the original input
            relative_path = utils.get_relative_path(original_input, abs_input,
                                                    res)
            scan_result = OrderedDict(location=relative_path)
            # Should we yield instead?
            scan_result.update(scan_one(res, scanners))
            results.append(scan_result)

    # TODO: eventually merge scans for the same resource location...
    # TODO: fix absolute paths as relative to original input argument...

    return results
def extractcode(ctx, input, verbose, quiet, *args, **kwargs):  # @ReservedAssignment
    """extract archives and compressed files found in the <input> file or directory tree.

    Use this command before scanning proper, as an <input> preparation step.
    Archives found inside an extracted archive are extracted recursively.
    Extraction is done in-place in a directory named '-extract' side-by-side with an archive.
    """

    original_input = fileutils.as_posixpath(input)
    abs_input = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input)))

    # note: we inline functions so they can close on local variables

    def extract_start():
        return style('Extracting archives...', fg='green')


    def extract_event(item):
        """
        Display an extract event.
        """
        if not item:
            return ''
        if verbose:
            if item.done:
                return ''
            line = utils.get_relative_path(original_input, abs_input, as_posixpath(item.source)) or ''
        else:
            line = fileutils.file_name(item.source) or ''
        return 'Extracting: %(line)s' % locals()


    def extract_end():
        """
        Display a summary of warnings and errors if any.
        """
        has_warnings = False
        has_errors = False
        summary = []
        for xev in extract_results:
            has_errors = has_errors or bool(xev.errors)
            has_warnings = has_warnings or bool(xev.warnings)
            source = as_posixpath(xev.source)
            source = utils.get_relative_path(original_input, abs_input, source)
            for e in xev.errors:
                summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False))
            for warn in xev.warnings:
                summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False))

        summary_color = 'green'
        if has_warnings:
            summary_color = 'yellow'
        if has_errors:
            summary_color = 'red'

        summary.append(style('Extracting done.', fg=summary_color, reset=True))
        return '\n'.join(summary)


    extract_results = []
    has_extract_errors = False

    with utils.progressmanager(extract_archives(abs_input),
                               item_show_func=extract_event,
                               start_show_func=extract_start,
                               finish_show_func=extract_end,
                               verbose=verbose,
                               quiet=quiet,
                               ) as extraction_events:
        for xev in extraction_events:
            if xev.done and (xev.warnings or xev.errors):
                has_extract_errors = has_extract_errors or xev.errors
                extract_results.append(xev)

    rc = 1 if has_extract_errors else 0
    ctx.exit(rc)