コード例 #1
0
def test_extractcode_command_can_extract_archive_with_unicode_names(
        monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('unicodearch', copy=True)
    if on_linux:
        test_dir = path_to_bytes(test_dir)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, [test_dir],
                           catch_exceptions=False)
    assert result.exit_code == 0

    uni_arch = b'unicodepath.tgz' if on_linux else 'unicodepath.tgz'
    uni_path = b'/unicodepath/' if on_linux else '/unicodepath/'

    file_result = [
        f for f in map(as_posixpath, file_iter(test_dir))
        if not f.endswith(uni_arch)
    ]
    file_result = [
        EMPTY_STRING.join(f.partition(uni_path)[1:]) for f in file_result
    ]
    file_result = [f for f in file_result if f]
    expected = [
        '/unicodepath/Ho_', '/unicodepath/Ho_a',
        '/unicodepath/koristenjem_Karkkainen_-_Sander.pdf'
    ]
    assert sorted(expected) == sorted(file_result)
コード例 #2
0
    def test_extract_option_works_with_relative_paths(self):
        # The setup is a tad complex because we want to have a relative dir
        # to the base dir where we run tests from, ie the scancode-toolkit/ dir
        # To use relative paths, we use our tmp dir at the root of the code
        from os.path import dirname, join, abspath
        from commoncode import fileutils
        import extractcode
        import tempfile
        import shutil

        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(
            scancode_root_abs, '').strip('\\/')
        test_file = self.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root,
                            test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        result = runner.invoke(cli.scancode, ['--extract', test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result = [
            as_posixpath(f.replace(test_tgt_dir, ''))
            for f in fileutils.file_iter(test_tgt_dir)
        ]
        assert sorted(expected) == sorted(file_result)
コード例 #3
0
def extracted_files(location):
    """
    Yield the locations of extracted files in a directory location.
    """
    assert location
    logger.debug('extracted_files for: %(location)r' % locals())
    return fileutils.file_iter(location)
コード例 #4
0
ファイル: test_cli.py プロジェクト: K-Rex/scancode-toolkit
    def test_extract_option_works_with_relative_paths(self):
        # The setup is a tad complex because we want to have a relative dir
        # to the base dir where we run tests from, ie the scancode-toolkit/ dir
        # To use relative paths, we use our tmp dir at the root of the code
        from os.path import dirname, join, abspath
        from  commoncode import fileutils
        import extractcode
        import tempfile
        import shutil

        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs,  '').strip('\\/')
        test_file = self.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        result = runner.invoke(cli.scancode, ['--extract', test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result  = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)]
        assert sorted(expected)==sorted(file_result)
コード例 #5
0
def extracted_files(location):
    """
    Yield the locations of extracted files in a directory location.
    """
    assert location
    logger.debug('extracted_files for: %(location)r' % locals())
    return fileutils.file_iter(location)
コード例 #6
0
def tree_checksum(base_dir=src_dir):
    """
    Return a checksum  computed from a file tree using the file paths, size and
    modification time stamps
    """
    hashable = [''.join([loc, str(getmtime(loc)), str(getsize(loc))]) for loc in file_iter(base_dir)]
    return md5(''.join(hashable)).hexdigest()
コード例 #7
0
def test_scan_works_with_multiple_processes_and_timeouts():
    # this contains test files with a lot of copyrights that should
    # take more thant timeout to scan
    test_dir = test_env.get_test_loc('timeout', copy=True)
    # add some random bytes to the test files to ensure that the license results will
    # not be cached
    import time, random
    for tf in fileutils.file_iter(test_dir):
        with open(tf, 'ab') as tfh:
            tfh.write('(c)' + str(time.time()) + repr([random.randint(0, 10 ** 6) for _ in range(10000)]) + '(c)')

    result_file = test_env.get_temp_file('json')

    result = run_scan_click(
        [ '--copyright', '--processes', '2',
         '--timeout', '0.000001',
         '--strip-root', '--format', 'json', test_dir, result_file],
    )

    assert result.exit_code == 1
    assert 'Scanning done' in result.output
    expected = [
        [(u'path', u'test1.txt'), (u'scan_errors', [u'ERROR: Processing interrupted: timeout after 0 seconds.'])],
        [(u'path', u'test2.txt'), (u'scan_errors', [u'ERROR: Processing interrupted: timeout after 0 seconds.'])],
        [(u'path', u'test3.txt'), (u'scan_errors', [u'ERROR: Processing interrupted: timeout after 0 seconds.'])],
    ]

    result_json = json.loads(open(result_file).read(), object_pairs_hook=OrderedDict)
    assert sorted(expected) == sorted(x.items() for x in result_json['files'])
コード例 #8
0
    def test_is_pom_m2(self):
        test_dir = self.get_test_loc('m2')
        for test_file in fileutils.file_iter(test_dir):
            if test_file.endswith('.json'):
                continue

            loc = os.path.join(test_dir, test_file)
            assert maven.is_pom(loc), 'file://' + loc + ' should be a POM'
コード例 #9
0
    def test_is_pom_m2(self):
        test_dir = self.get_test_loc('m2')
        for test_file in fileutils.file_iter(test_dir):
            if test_file.endswith('.json'):
                continue

            loc = os.path.join(test_dir, test_file)
            assert maven.is_pom(loc), 'file://' + loc + ' should be a POM'
コード例 #10
0
    def test_file_iter_can_walk_non_utf8_path_from_unicode_path(self):
        test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz')
        test_dir = join(test_dir, 'non_unicode')

        if not on_linux:
            test_dir = unicode(test_dir)
        result = list(fileutils.file_iter(test_dir))
        assert 18 == len(result)
コード例 #11
0
 def test_file_iter(self):
     test_dir = self.get_test_loc('fileutils/walk')
     base = self.get_test_loc('fileutils')
     result = [as_posixpath(f.replace(base, '')) for f in fileutils.file_iter(test_dir)]
     expected = [
         '/walk/f',
         '/walk/unicode.zip',
         '/walk/d1/f1',
         '/walk/d1/d2/f2',
         '/walk/d1/d2/d3/f3'
     ]
     assert sorted(expected) == sorted(result)
コード例 #12
0
ファイル: cache.py プロジェクト: pombredanne/findlicense
def tree_checksum(base_dir=src_dir, ignored=_ignored_from_hash):
    """
    Return a checksum computed from a file tree using the file paths, size and
    last modified time stamps.

    The purpose is to detect is there has been any modification to source code,
    compiled code or licenses or rule files and use this as a proxy to verify the
    cache consistency.
    """
    hashable = [''.join([loc, str(getmtime(loc)), str(getsize(loc))])
                for loc in file_iter(base_dir, ignored=_ignored_from_hash)]
    return md5(''.join(hashable)).hexdigest()
コード例 #13
0
ファイル: cache.py プロジェクト: ocabrisses/scancode-toolkit
def tree_checksum(tree_base_dir=src_dir, _ignored=_ignored_from_hash):
    """
    Return a checksum computed from a file tree using the file paths,
    size and last modified time stamps.
    The purpose is to detect is there has been any modification to
    source code or data files and use this as a proxy to verify the
    cache consistency.

    NOTE: this is not 100% fool proof but good enough in practice.
    """
    hashable = (pth + str(getmtime(pth)) + str(getsize(pth))
                for pth in file_iter(tree_base_dir, ignored=_ignored))
    return md5(''.join(sorted(hashable))).hexdigest()
コード例 #14
0
def tree_checksum(tree_base_dir=src_dir, _ignored=_ignored_from_hash):
    """
    Return a checksum computed from a file tree using the file paths,
    size and last modified time stamps.
    The purpose is to detect is there has been any modification to
    source code or data files and use this as a proxy to verify the
    cache consistency.

    NOTE: this is not 100% fool proof but good enough in practice.
    """
    hashable = (pth + str(getmtime(pth)) + str(getsize(pth))
                for pth in file_iter(tree_base_dir, ignored=_ignored))
    return md5(''.join(sorted(hashable))).hexdigest()
コード例 #15
0
ファイル: image_v11.py プロジェクト: hv7214/conan
 def populate(self, base_dir):
     """
     Collect the `base_dir` for image repositories.
     """
     for fil in fileutils.file_iter(base_dir):
         # FIXME: we are only looking at V11 repos for now.
         fn = fileutils.file_name(fil)
         if not fn == MANIFEST_JSON_FILE:
             continue
         rd = parent_directory(fil)
         repo = Repository()
         repo.load_manifest(rd)
         logger_debug('populate: path: %(fn)r' % locals())
         self.repositories[rd] = repo
コード例 #16
0
def load_licenses(licenses_data_dir=licenses_data_dir, with_deprecated=False):
    """
    Return a mapping of key -> license objects, loaded from license files.
    """
    licenses = {}
    for data_file in file_iter(licenses_data_dir):
        if not data_file.endswith('.yml'):
            continue
        key = file_base_name(data_file)
        lic = License(key, licenses_data_dir)
        if not with_deprecated and lic.is_deprecated:
            continue
        licenses[key] = lic
    return licenses
コード例 #17
0
ファイル: models.py プロジェクト: ocabrisses/scancode-toolkit
def load_licenses(licenses_data_dir=licenses_data_dir , with_deprecated=False):
    """
    Return a mapping of key -> license objects, loaded from license files.
    """
    licenses = {}
    for data_file in file_iter(licenses_data_dir):
        if not data_file.endswith('.yml'):
            continue
        key = file_base_name(data_file)
        lic = License(key, licenses_data_dir)
        if not with_deprecated and lic.is_deprecated:
            continue
        licenses[key] = lic
    return licenses
コード例 #18
0
def load_rules(rules_data_dir=rules_data_dir, load_notes=False):
    """
    Return an iterable of rules loaded from rule files.
    """
    # TODO: OPTIMIZE: create a graph of rules to account for containment and similarity clusters?
    # TODO: we should assign the rule id at that stage
    seen_files = set()
    processed_files = set()
    lower_case_files = set()
    case_problems = set()
    for data_file in file_iter(rules_data_dir):
        if data_file.endswith('.yml'):
            base_name = file_base_name(data_file)
            rule_file = join(rules_data_dir, base_name + '.RULE')
            yield Rule(data_file=data_file,
                       text_file=rule_file,
                       load_notes=load_notes)

            # accumulate sets to ensures we do not have illegal names or extra
            # orphaned files
            data_lower = data_file.lower()
            if data_lower in lower_case_files:
                case_problems.add(data_lower)
            else:
                lower_case_files.add(data_lower)

            rule_lower = rule_file.lower()
            if rule_lower in lower_case_files:
                case_problems.add(rule_lower)
            else:
                lower_case_files.add(rule_lower)

            processed_files.update([data_file, rule_file])

        if not data_file.endswith('~'):
            seen_files.add(data_file)

    unknown_files = seen_files - processed_files
    if unknown_files or case_problems:
        if unknown_files:
            files = '\n'.join(sorted(unknown_files))
            msg = 'Orphaned files in rule directory: %(rules_data_dir)r\n%(files)s'

        if case_problems:
            files = '\n'.join(sorted(case_problems))
            msg += '\nRule files with non-unique name ignoring casein rule directory: %(rules_data_dir)r\n%(files)s'

        raise Exception(msg % locals())
コード例 #19
0
ファイル: test_rootfs.py プロジェクト: hv7214/conan
 def test_rebuild_rootfs_format_v10_with_delete(self):
     test_dir = self.extract_test_tar('docker/v10_format/busybox2.tgz')
     image = image_v10.ImageV10(test_dir)
     target_dir = self.get_temp_dir()
     rebuild_rootfs(image, target_dir)
     expected = [
         '/lib/librt-0.9.33.2.so',
         '/lib/libgcc_s.so.1',
         '/lib/libutil-0.9.33.2.so',
         '/lib/libuClibc-0.9.33.2.so',
         '/lib/libm-0.9.33.2.so',
         '/lib/libresolv-0.9.33.2.so',
         '/lib/libnsl-0.9.33.2.so',
         '/lib/libpthread-0.9.33.2.so'
     ]
     assert sorted(expected) == sorted(f.replace(target_dir, '') for f in fileutils.file_iter(target_dir))
コード例 #20
0
def test_extractcode_command_can_extract_shallow(monkeypatch):
    test_dir = test_env.get_test_loc('extract_shallow', copy=True)
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, ['--shallow', test_dir])
    assert result.exit_code == 0
    file_result = [f for f in map(as_posixpath, file_iter(test_dir)) if not f.endswith('unicodepath.tgz')]
    file_result = [''.join(f.partition('/top.zip-extract/')[1:]) for f in file_result]
    file_result = [f for f in file_result if f]
    # this checks that the zip in top.zip are not extracted
    expected = [
        '/top.zip-extract/some3.zip',
        '/top.zip-extract/some2.zip',
        '/top.zip-extract/some1.zip',
    ]
    assert sorted(expected) == sorted(file_result)
コード例 #21
0
ファイル: models.py プロジェクト: ocabrisses/scancode-toolkit
def load_rules(rules_data_dir=rules_data_dir, load_notes=False):
    """
    Return an iterable of rules loaded from rule files.
    """
    # TODO: OPTIMIZE: create a graph of rules to account for containment and similarity clusters?
    # TODO: we should assign the rule id at that stage
    seen_files = set()
    processed_files = set()
    lower_case_files = set()
    case_problems = set()
    for data_file in file_iter(rules_data_dir):
        if data_file.endswith('.yml'):
            base_name = file_base_name(data_file)
            rule_file = join(rules_data_dir, base_name + '.RULE')
            yield Rule(data_file=data_file, text_file=rule_file,
                       load_notes=load_notes)

            # accumulate sets to ensures we do not have illegal names or extra
            # orphaned files
            data_lower = data_file.lower()
            if data_lower in lower_case_files:
                case_problems.add(data_lower)
            else:
                lower_case_files.add(data_lower)

            rule_lower = rule_file.lower()
            if rule_lower in lower_case_files:
                case_problems.add(rule_lower)
            else:
                lower_case_files.add(rule_lower)

            processed_files.update([data_file, rule_file])

        if not data_file.endswith('~'):
            seen_files.add(data_file)

    unknown_files = seen_files - processed_files
    if unknown_files or case_problems:
        if unknown_files:
            files = '\n'.join(sorted(unknown_files))
            msg = 'Orphaned files in rule directory: %(rules_data_dir)r\n%(files)s'

        if case_problems:
            files = '\n'.join(sorted(case_problems))
            msg += '\nRule files with non-unique name ignoring casein rule directory: %(rules_data_dir)r\n%(files)s'

        raise Exception(msg % locals())
コード例 #22
0
def test_extractcode_command_can_extract_shallow(monkeypatch):
    test_dir = test_env.get_test_loc('extract_shallow', copy=True)
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, ['--shallow', test_dir])
    assert result.exit_code == 0
    file_result = [
        f for f in map(as_posixpath, file_iter(test_dir))
        if not f.endswith('unicodepath.tgz')
    ]
    file_result = [
        ''.join(f.partition('/top.zip-extract/')[1:]) for f in file_result
    ]
    file_result = [f for f in file_result if f]
    # this checks that the zip in top.zip are not extracted
    expected = [
        '/top.zip-extract/some3.zip',
        '/top.zip-extract/some2.zip',
        '/top.zip-extract/some1.zip',
    ]
    assert sorted(expected) == sorted(file_result)
コード例 #23
0
def test_extractcode_command_can_extract_archive_with_unicode_names(monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('unicodearch', copy=True)
    if on_linux:
        test_dir = path_to_bytes(test_dir)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, [test_dir], catch_exceptions=False)
    assert result.exit_code == 0

    uni_arch = b'unicodepath.tgz' if on_linux else 'unicodepath.tgz'
    uni_path = b'/unicodepath/' if on_linux else '/unicodepath/'

    file_result = [f for f in map(as_posixpath, file_iter(test_dir)) if not f.endswith(uni_arch)]
    file_result = [EMPTY_STRING.join(f.partition(uni_path)[1:]) for f in file_result]
    file_result = [f for f in file_result if f]
    expected = [
        '/unicodepath/Ho_',
        '/unicodepath/Ho_a',
        '/unicodepath/koristenjem_Karkkainen_-_Sander.pdf'
    ]
    assert sorted(expected) == sorted(file_result)
コード例 #24
0
ファイル: archive.py プロジェクト: yudhik11/scancode-toolkit
def extract_twice(location, target_dir, extractor1, extractor2):
    """
    Extract a nested compressed archive at `location` to `target_dir` using
    the `extractor1` function to a temporary directory then the `extractor2`
    function on the extracted payload of `extractor1`.

    Return a list of warning messages. Raise exceptions on errors.

    Typical nested archives include compressed tarballs and RPMs (containing a
    compressed cpio).

    Note: it would be easy to support deeper extractor chains, but this gets
    hard to trace and debug very quickly. A depth of two is simple and sane and
    covers most common cases.
    """
    if on_linux:
        location = path_to_bytes(location)
        target_dir = path_to_bytes(target_dir)
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = unicode(os.path.abspath(os.path.expanduser(target_dir)))
    # extract first the intermediate payload to a temp dir
    temp_target = unicode(fileutils.get_temp_dir('extract'))
    warnings = extractor1(abs_location, temp_target)
    if TRACE:
        logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

    # extract this intermediate payload to the final target_dir
    try:
        inner_archives = list(fileutils.file_iter(temp_target))
        if not inner_archives:
            warnings.append(location + ': No files found in archive.')
        else:
            for extracted1_loc in inner_archives:
                if TRACE:
                    logger.debug('extract_twice: extractor2: %(extracted1_loc)r' % locals())
                warnings.extend(extractor2(extracted1_loc, abs_target_dir))
    finally:
        # cleanup the temporary output from extractor1
        fileutils.delete(temp_target)
    return warnings
コード例 #25
0
def test_extractcode_command_can_extract_archive_with_unicode_names_verbose(
        monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('unicodearch', copy=True)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, ['--verbose', test_dir],
                           catch_exceptions=False)
    assert result.exit_code == 0

    assert 'Sanders' in result.output
    file_result = [
        f for f in map(as_posixpath, file_iter(test_dir))
        if not f.endswith('unicodepath.tgz')
    ]
    file_result = [
        ''.join(f.partition('/unicodepath/')[1:]) for f in file_result
    ]
    file_result = [f for f in file_result if f]
    expected = [
        '/unicodepath/Ho_', '/unicodepath/Ho_a',
        '/unicodepath/koristenjem_Karkkainen_-_Sander.pdf'
    ]
    assert sorted(expected) == sorted(file_result)
コード例 #26
0
def test_scan_works_with_multiple_processes_and_timeouts():
    # this contains test files with a lot of copyrights that should
    # take more thant timeout to scan
    test_dir = test_env.get_test_loc('timeout', copy=True)
    # add some random bytes to the test files to ensure that the license results will
    # not be cached
    import time, random
    for tf in fileutils.file_iter(test_dir):
        with open(tf, 'ab') as tfh:
            tfh.write('(c)' + str(time.time()) +
                      repr([random.randint(0, 10**6)
                            for _ in range(10000)]) + '(c)')

    result_file = test_env.get_temp_file('json')

    result = run_scan_click([
        '--copyright', '--processes', '2', '--timeout', '0.000001',
        '--strip-root', '--format', 'json', test_dir, result_file
    ], )

    assert result.exit_code == 1
    assert 'Scanning done' in result.output
    expected = [
        [(u'path', u'test1.txt'),
         (u'scan_errors',
          [u'ERROR: Processing interrupted: timeout after 0 seconds.'])],
        [(u'path', u'test2.txt'),
         (u'scan_errors',
          [u'ERROR: Processing interrupted: timeout after 0 seconds.'])],
        [(u'path', u'test3.txt'),
         (u'scan_errors',
          [u'ERROR: Processing interrupted: timeout after 0 seconds.'])],
    ]

    result_json = json.loads(open(result_file).read(),
                             object_pairs_hook=OrderedDict)
    assert sorted(expected) == sorted(x.items() for x in result_json['files'])
コード例 #27
0
ファイル: cli.py プロジェクト: retrography/scancode-toolkit
def scancode(ctx, input, output_file, extract, copyright, license, format, verbose, *args, **kwargs):
    """scan the <input> file or directory for origin and license and save results to the <output_file>.

    The scan results are printed on terminal if <output_file> is not provided.
    """
    abs_input = os.path.abspath(os.path.expanduser(input))
    scans = [copyright, license]
    if extract:
        if any(scans):
            # exclusive, ignoring other options.
            # FIXME: this should turned into  a sub-command
            ctx.fail('''The '--extract' option cannot be combined with other scanning options.
Use the '--extract' option alone to extract archives found in  <input>.
then run scancode again to scan the extracted files.''')
            ctx.exit(1)

        click.secho('Extracting archives...', fg='green')
        extract_with_progress(abs_input, verbose)
        click.secho('Extracting done.', fg='green')
        return

    # Default scan when no options is provided
    if not any(scans):
        copyright = True
        license = True

    if copyright or license:
        click.secho('Scanning files...', fg='green')
        results = []

        ignored = partial(ignore.is_ignored, ignores=ignore.ignores_VCS, unignores={})
        files = file_iter(abs_input, ignored=ignored)

        if not verbose:
            # only display a progress bar
            with click.progressbar(files, show_pos=True) as files:
                for input_file in files:
                    results.append(scan_one(input_file, copyright, license, verbose))
        else:
            for input_file in file_iter(files):
                results.append(scan_one(input_file, copyright, license, verbose))

        if format == 'html':
            output_file.write(as_html(results))

        elif format == 'html-app':
            output_file.write(as_html_app(results, input, output_file))
            try:
                create_html_app_assets(output_file)
            except HtmlAppAssetCopyWarning:
                click.secho('\nHTML app creation skipped when printing to terminal.',
                            fg='yellow')
            except HtmlAppAssetCopyError:
                click.secho('\nFailed to create HTML app.', fg='red')

        elif format == 'json':
            meta = {
                'count': len(results),
                'notice': acknowledgment_text_json,
                'results': results,
                'version': version,
            }
            output_file.write(json.dumps(meta, indent=2, sort_keys=True))
        else:
            # This should never happen by construction
            raise Exception('Unknown format: ' + repr(format))
        click.secho('Scanning done.', fg='green')
コード例 #28
0
 def test_file_iter(self):
     test_dir = self.get_test_loc("fileutils/walk")
     base = self.get_test_loc("fileutils")
     result = [as_posixpath(f.replace(base, "")) for f in fileutils.file_iter(test_dir)]
     expected = ["/walk/f", "/walk/unicode.zip", "/walk/d1/f1", "/walk/d1/d2/f2", "/walk/d1/d2/d3/f3"]
     assert expected == result
コード例 #29
0
 def test_file_iter_can_walk_an_empty_dir(self):
     test_dir = self.get_temp_dir()
     result = list(fileutils.file_iter(test_dir))
     expected = []
     assert expected == result
コード例 #30
0
 def test_file_iter_can_iterate_a_single_file(self):
     test_file = self.get_test_loc('fileutils/walk/f')
     result = [as_posixpath(f) for f in fileutils.file_iter(test_file)]
     expected = [as_posixpath(test_file)]
     assert expected == result
コード例 #31
0
 def test_some_media_do_not_yield_text_lines(self):
     test_dir = self.get_test_loc('media_without_text')
     for test_file in file_iter(test_dir):
         result = list(text_lines(test_file))
         assert [] == result, 'Should not return text lines:' + test_file
コード例 #32
0
 def test_some_media_do_yield_text_lines(self):
     test_dir = self.get_test_loc('media_with_text')
     for test_file in file_iter(test_dir):
         result = list(text_lines(test_file))
         assert result, 'Should return text lines:' + test_file
         assert any('nexb' in l for l in result)
コード例 #33
0
def scancode(ctx, input, output_file, extract, copyright, license, format,
             verbose, *args, **kwargs):  # @ReservedAssignment
    """scan the <input> file or directory for origin and license and save results to the <output_file>.

    The scan results are printed on terminal if <output_file> is not provided.
    """
    abs_input = os.path.abspath(os.path.expanduser(input))
    scans = [copyright, license]
    if extract:
        if any(scans):
            # exclusive, ignoring other options.
            # FIXME: this should turned into  a sub-command
            ctx.fail(
                '''The '--extract' option cannot be combined with other scanning options.
Use the '--extract' option alone to extract archives found in  <input>.
then run scancode again to scan the extracted files.''')
            ctx.exit(1)

        click.secho('Extracting archives...', fg='green')
        extract_with_progress(abs_input, verbose)
        click.secho('Extracting done.', fg='green')
        return

    # Default scan when no options is provided
    if not any(scans):
        copyright = True  # @ReservedAssignment
        license = True  # @ReservedAssignment

    if copyright or license:
        click.secho('Scanning files...', fg='green')
        results = []

        ignored = partial(ignore.is_ignored,
                          ignores=ignore.ignores_VCS,
                          unignores={})
        files = file_iter(abs_input, ignored=ignored)

        if not verbose:
            # only display a progress bar
            with click.progressbar(files, show_pos=True) as files:
                for input_file in files:
                    results.append(
                        scan_one(input_file, copyright, license, verbose))
        else:
            for input_file in files:
                results.append(
                    scan_one(input_file, copyright, license, verbose))

        if format == 'html':
            output_file.write(as_html(results))

        elif format == 'html-app':
            output_file.write(as_html_app(results, input, output_file))
            try:
                create_html_app_assets(output_file)
            except HtmlAppAssetCopyWarning:
                click.secho(
                    '\nHTML app creation skipped when printing to terminal.',
                    fg='yellow')
            except HtmlAppAssetCopyError:
                click.secho('\nFailed to create HTML app.', fg='red')

        elif format == 'json':
            meta = {
                'count': len(results),
                'notice': acknowledgment_text_json,
                'results': results,
                'version': version,
            }
            output_file.write(json.dumps(meta, indent=2, sort_keys=True))
        else:
            # This should never happen by construction
            raise Exception('Unknown format: ' + repr(format))
        click.secho('Scanning done.', fg='green')