Ejemplo n.º 1
0
    def test_file_name_are_not_too_long(self):

        # See https://unix.stackexchange.com/questions/32795/what-is-the-maximum-allowed-filename-and-folder-size-with-ecryptfs
        # 143 is the max filename length that luks and ecryptfs support!

        long_filenames = [
            r for r in resource_iter('src') if len(r.split('/')[-1]) > 143
        ]
        long_filenames.extend(r for r in resource_iter('tests')
                              if len(r.split('/')[-1]) > 143)
        if long_filenames:
            msg = '\n'.join(long_filenames)
            raise Exception(
                f'These filenames are too long (over 143 characters):\n{msg}')
Ejemplo n.º 2
0
def load_licenses(licenses_data_dir=licenses_data_dir , with_deprecated=False):
    """
    Return a mapping of key -> license objects, loaded from license files.
    Raise Exceptions if there are dangling orphaned files.
    """
    licenses = {}
    used_files = set()
    all_files = set(resource_iter(licenses_data_dir, ignored=ignore_editor_tmp_files, with_dirs=False))
    for data_file in sorted(all_files):
        if data_file.endswith('.yml'):
            key = file_base_name(data_file)
            lic = License(key, licenses_data_dir)
            used_files.add(data_file)
            if exists(lic.text_file):
                used_files.add(lic.text_file)
            if not with_deprecated and lic.is_deprecated:
                continue
            licenses[key] = lic

    dangling = all_files.difference(used_files)
    if dangling:
        msg = 'Some License data or text files are orphaned in "{}".\n'.format(licenses_data_dir)
        msg += '\n'.join('file://{}'.format(f) for f in sorted(dangling))
        raise Exception(msg)
    return licenses
Ejemplo n.º 3
0
def resource_paths(base_path, diag, scans_cache_class, pre_scan_plugins=()):
    """
    Yield `Resource` objects for all the files found at base_path
    (either a directory or file) given an absolute base_path. Only yield
    Files, not directories.
    absolute path is a native OS path.
    base_path-relative path is a POSIX path.

    The relative path is guaranted to be unicode and may be URL-encoded and may not
    be suitable to address an actual file.
    """
    if base_path:
        if on_linux:
            base_path = path_to_bytes(base_path)
        else:
            base_path = path_to_unicode(base_path)

    base_path = os.path.abspath(os.path.normpath(os.path.expanduser(base_path)))
    base_is_dir = filetype.is_dir(base_path)
    len_base_path = len(base_path)
    ignores = {}
    if pre_scan_plugins:
        for plugin in pre_scan_plugins:
            ignores.update(plugin.get_ignores())
    ignores.update(ignore.ignores_VCS)

    ignorer = build_ignorer(ignores, unignores={})
    resources = fileutils.resource_iter(base_path, ignored=ignorer)

    for abs_path in resources:
        resource = Resource(scans_cache_class, abs_path, base_is_dir, len_base_path)
        # always fetch infos and cache.
        resource.put_info(scan_infos(abs_path, diag=diag))
        yield resource
Ejemplo n.º 4
0
def create_or_update_release_and_upload_directory(user,
                                                  repo,
                                                  tag_name,
                                                  token,
                                                  directory,
                                                  retry_limit=10,
                                                  description=None):
    """
    Create or update a GitHub release at https://github.com/<user>/<repo> for
    `tag_name` tag using the optional `description` for this release.
    Use the provided `token` as a GitHub token for API calls authentication.
    Upload all files found in the `directory` tree to that GitHub release.
    Retry API calls up to `retry_limit` time to work around instability the
    GitHub API.
    """

    api = grr.GithubApi(
        github_api_url='https://api.github.com',
        user=user,
        repo=repo,
        token=token,
        retry_limit=retry_limit,
    )
    release = grr.Release(tag_name=tag_name, body=description)
    files = [Path(r) for r in resource_iter(directory, with_dirs=False)]
    grr.make_release(api, release, files)
def generate_req_text(find_links, req_file, package_name=None, upgrade=False):
    """
    Generate a requirement file as `req_file` of all dependencies wheels and 
    sdists present at the find_links.If a `package_name` is provided it will 
    be updated to its latest version and if upgrade option is called,it will 
    be updated all the wheels to the latest version.
    """
    thirdparty = resource_iter(find_links, with_dirs=False)
    dependencies = [
        files for files in thirdparty
        if fnmatchcase(files, '*py3*') or fnmatchcase(files, py_abi) or (
            fnmatchcase(files, '*tar.gz*')
            and not fnmatchcase(files, '*py2-ipaddress-3.4.1.tar.gz*'))
    ]
    with tempfile.TemporaryDirectory() as temp_dir:
        for deps in dependencies:
            copy(deps, temp_dir)
        pip_args = [
            'pip-compile',
            '--generate-hashes',
            '--find-links',
            temp_dir,
            '--output-file',
            req_file,
            '--allow-unsafe',
            '--pip-args',
            '--no-index',
        ]
        if upgrade:
            pip_args.append('--upgrade')
        if package_name:
            pip_args.extend(['--upgrade-package', package_name])
        run(pip_args)
Ejemplo n.º 6
0
def resource_paths(base_path, user_ignores):
    """
    Yield tuples of (absolute path, base_path-relative path) for all the files found
    at base_path (either a directory or file) given an absolute base_path. Only yield
    Files, not directories.
    absolute path is a native OS path.
    base_path-relative path is a POSIX path.

    The relative path is guaranted to be unicode and may be URL-encoded and may not
    be suitable to address an actual file.
    """
    base_path = os.path.abspath(os.path.normpath(
        os.path.expanduser(base_path)))
    base_is_dir = filetype.is_dir(base_path)
    len_base_path = len(base_path)
    ignores = dict()
    ignores.update(user_ignores)
    ignores.update(ignore.ignores_VCS)
    ignored = partial(ignore.is_ignored, ignores=ignores, unignores={})
    resources = fileutils.resource_iter(base_path, ignored=ignored)

    for abs_path in resources:
        posix_path = fileutils.as_posixpath(abs_path)
        # fix paths: keep the path as relative to the original base_path
        rel_path = get_relative_path(posix_path, len_base_path, base_is_dir)
        yield abs_path, rel_path
def test_extractcode_command_can_extract_archive_with_unicode_names(
        monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('unicodearch', copy=True)
    if on_linux:
        test_dir = fsencode(test_dir)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, [test_dir])
    assert result.exit_code == 0

    uni_arch = b'unicodepath.tgz' if on_linux and py2 else 'unicodepath.tgz'
    uni_path = b'/unicodepath/' if on_linux and py2 else '/unicodepath/'

    file_result = [
        f for f in map(as_posixpath, resource_iter(test_dir, with_dirs=False))
        if not f.endswith(uni_arch)
    ]
    file_result = [
        EMPTY_STRING.join(f.partition(uni_path)[1:]) for f in file_result
    ]
    file_result = [f for f in file_result if f]
    expected = [
        '/unicodepath/Ho_', '/unicodepath/Ho_a',
        '/unicodepath/koristenjem_Karkkainen_-_Sander.pdf'
    ]
    assert sorted(expected) == sorted(file_result)
Ejemplo n.º 8
0
    def test_fileutils_resource_iter_can_walk_unicode_path_with_zip(self):
        test_dir = self.extract_test_zip('fileutils/walk/unicode.zip')
        test_dir = join(test_dir, 'unicode')

        if on_linux:
            EMPTY_STRING = ''
        else:
            test_dir = unicode(test_dir)
            EMPTY_STRING = u''

        result = sorted([p.replace(test_dir, EMPTY_STRING) for p in fileutils.resource_iter(test_dir)])
        if on_linux:
            expected = [
                '/2.csv',
                '/a',
                '/a/gru\xcc\x88n.png'
            ]
        elif on_mac:
            expected = [
                u'/2.csv',
                u'/a',
                u'/a/gru\u0308n.png'
            ]
        elif on_windows:
            expected = [
                u'\\2.csv',
                u'\\a',
                u'\\a\\gru\u0308n.png'
            ]
        assert expected == result
    def check_extract(self, test_function, test_file, expected, expected_warnings=None, check_all=False):
        """
        Run the extraction `test_function` on `test_file` checking that a map of
        expected paths --> size exist in the extracted target directory.
        Does not test the presence of all files unless `check_all` is True.
        """
        from extractcode import archive

        test_file = self.get_test_loc(test_file)
        test_dir = self.get_temp_dir()
        warnings = test_function(test_file, test_dir)
        if expected_warnings is not None:
            assert expected_warnings == warnings

        if check_all:
            len_test_dir = len(test_dir)
            extracted = {path[len_test_dir:]: filetype.get_size(path) for path in fileutils.resource_iter(test_dir, with_dirs=False)}
            expected = {os.path.join(test_dir, exp_path): exp_size for exp_path, exp_size in expected.items()}
            assert sorted(expected.items()) == sorted(extracted.items())
        else:
            for exp_path, exp_size in expected.items():
                exp_loc = os.path.join(test_dir, exp_path)
                msg = '''When extracting: %(test_file)s
                    With function: %(test_function)r
                    Failed to find expected path: %(exp_loc)s'''
                assert os.path.exists(exp_loc), msg % locals()
                if exp_size is not None:
                    res_size = os.stat(exp_loc).st_size
                    msg = '''When extracting: %(test_file)s
                        With function: %(test_function)r
                        Failed to assert the correct size %(exp_size)d
                        Got instead: %(res_size)d
                        for expected path: %(exp_loc)s'''
                    assert exp_size == res_size, msg % locals()
Ejemplo n.º 10
0
def test_extractcode_command_works_with_relative_paths(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from  commoncode import fileutils
    import extractcode
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, [test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.resource_iter(test_tgt_dir, with_dirs=False)]
        assert sorted(expected) == sorted(file_result)
    finally:
        fileutils.delete(test_src_dir)
def test_extractcode_command_can_ignore(monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('extract_ignore', copy=True)
    if on_linux:
        test_dir = fsencode(test_dir)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode,
                           ['--ignore', '*.tar', test_dir])
    assert result.exit_code == 0

    file_result = [
        f for f in map(as_posixpath, resource_iter(test_dir, with_dirs=False))
        if not f.endswith('a.tar') or not f.endswith('b.tar')
    ]
    file_result = [
        EMPTY_STRING.join(f.partition('/a.zip-extract/')[1:])
        for f in file_result
    ]
    file_result = [f for f in file_result if f]
    expected = [
        '/a.zip-extract/a.txt',
        '/a.zip-extract/b.zip',
        '/a.zip-extract/b.zip-extract/b.txt',
        '/a.zip-extract/c.tar',
    ]
    assert sorted(expected) == sorted(file_result)
Ejemplo n.º 12
0
    def test_is_pom_m2(self):
        test_dir = self.get_test_loc('m2')
        for test_file in fileutils.resource_iter(test_dir, with_dirs=False):
            if test_file.endswith('.json'):
                continue

            loc = os.path.join(test_dir, test_file)
            assert maven.MavenPomXmlHandler.is_datafile(loc), 'file://' + loc + ' should be a POM'
Ejemplo n.º 13
0
 def test_resource_iter_can_iterate_a_single_file_with_dirs(self):
     test_file = self.get_test_loc('fileutils/walk/f')
     result = [
         as_posixpath(f)
         for f in fileutils.resource_iter(test_file, with_dirs=True)
     ]
     expected = [as_posixpath(test_file)]
     assert expected == result
Ejemplo n.º 14
0
    def test_is_pom_maven2(self):
        test_dir = self.get_test_loc('maven2')
        for test_file in fileutils.resource_iter(test_dir, with_dirs=False):
            if test_file.endswith('.json'):
                continue

            loc = os.path.join(test_dir, test_file)
            assert maven.is_pom(loc), loc + ' should be a POM'
Ejemplo n.º 15
0
    def test_resource_iter_can_walk_non_utf8_path_from_unicode_path(self):
        test_dir = self.extract_test_tar_raw('fileutils/walk_non_utf8/non_unicode.tgz')
        test_dir = join(test_dir, 'non_unicode')

        if not on_linux:
            test_dir = unicode(test_dir)
        result = list(fileutils.resource_iter(test_dir))
        assert 18 == len(result)
Ejemplo n.º 16
0
    def test_distro_from_os_release_file(self):
        test_dir = self.get_test_loc('distro/os-release')

        for test_file in resource_iter(test_dir, with_dirs=False):
            if test_file.endswith('-expected.json'):
                continue
            expected = test_file + '-distro-expected.json'
            result = Distro.from_os_release_file(test_file).to_dict()
            check_expected(result, expected, regen=False)
Ejemplo n.º 17
0
 def test_image_squash_simple(self):
     test_dir = self.extract_test_tar('rootfs/hello-world.tar')
     img = image.Image.get_images_from_dir(test_dir)[0]
     target_dir = self.get_temp_dir()
     img.squash(target_dir)
     results = sorted([p.replace(target_dir, '')
         for p in fileutils.resource_iter(target_dir)])
     expected = ['/hello']
     assert expected == results
Ejemplo n.º 18
0
def load_rules(rules_data_dir=rules_data_dir):
    """
    Return an iterable of rules loaded from rule files.
    """
    # TODO: OPTIMIZE: create a graph of rules to account for containment and
    # similarity clusters?
    seen_files = set()
    processed_files = set()
    lower_case_files = set()
    case_problems = set()
    model_errors = []
    for data_file in resource_iter(rules_data_dir, with_dirs=False):
        if data_file.endswith('.yml'):
            base_name = file_base_name(data_file)
            rule_file = join(rules_data_dir, base_name + '.RULE')
            try:
                rule = Rule(data_file=data_file, text_file=rule_file)
                yield rule
            except Exception as re:
                model_errors.append(str(re))
            # accumulate sets to ensures we do not have illegal names or extra
            # orphaned files
            data_lower = data_file.lower()
            if data_lower in lower_case_files:
                case_problems.add(data_lower)
            else:
                lower_case_files.add(data_lower)

            rule_lower = rule_file.lower()
            if rule_lower in lower_case_files:
                case_problems.add(rule_lower)
            else:
                lower_case_files.add(rule_lower)

            processed_files.update([data_file, rule_file])

        if not data_file.endswith('~'):
            seen_files.add(data_file)

    if model_errors:
        errors = '\n'.join(model_errors)
        msg = 'Invalid in rule directory: %(rules_data_dir)r\n%(errors)s'
        raise Exception(msg % locals())

    unknown_files = seen_files - processed_files
    if unknown_files or case_problems:

        if unknown_files:
            files = '\n'.join(sorted('file://' + f for f in unknown_files))
            msg = 'Orphaned files in rule directory: %(rules_data_dir)r\n%(files)s'

        if case_problems:
            files = '\n'.join(sorted('file://' + f for f in case_problems))
            msg += '\nRule files with non-unique name ignoring casein rule directory: %(rules_data_dir)r\n%(files)s'

        raise Exception(msg % locals())
Ejemplo n.º 19
0
 def test_resource_iter_with_dir_only(self):
     test_dir = self.get_test_loc('fileutils/walk')
     base = self.get_test_loc('fileutils')
     result = sorted([as_posixpath(f.replace(base, ''))
                      for f in fileutils.resource_iter(test_dir, with_files=False, with_dirs=True)])
     expected = [
          '/walk/d1',
          '/walk/d1/d2',
          '/walk/d1/d2/d3',
     ]
     assert sorted(expected) == sorted(result)
Ejemplo n.º 20
0
 def test_resource_iter_with_dirs(self):
     test_dir = self.get_test_loc('fileutils/walk')
     base = self.get_test_loc('fileutils')
     result = sorted([
         as_posixpath(f.replace(base, ''))
         for f in fileutils.resource_iter(test_dir, with_dirs=True)
     ])
     expected = [
         '/walk/d1', '/walk/d1/d2', '/walk/d1/d2/d3', '/walk/d1/d2/d3/f3',
         '/walk/d1/d2/f2', '/walk/d1/f1', '/walk/f', '/walk/unicode.zip'
     ]
     assert sorted(expected) == sorted(result)
Ejemplo n.º 21
0
def tree_checksum(tree_base_dir=scancode_src_dir, _ignored=_ignored_from_hash):
    """
    Return a checksum computed from a file tree using the file paths, size and
    last modified time stamps. The purpose is to detect is there has been any
    modification to source code or data files and use this as a proxy to verify
    the cache consistency.

    NOTE: this is not 100% fool proof but good enough in practice.
    """
    resources = resource_iter(tree_base_dir, ignored=_ignored, with_dirs=False)
    hashable = (pth + str(getmtime(pth)) + str(getsize(pth)) for pth in resources)
    return md5(''.join(sorted(hashable))).hexdigest()
Ejemplo n.º 22
0
def cli(test_directory):
    """
    Generate license tests YAML data files for each file in a directory tree.
    The expected license is computed from license detection.
    """
    from licensedcode_test_utils import LicenseTest  # NOQA

    print()
    for test_file in fileutils.resource_iter(test_directory, with_dirs=False):
        lt = build_test(test_file)
        lt.dump()
        print("--> License Test added:", lt)
        print()
Ejemplo n.º 23
0
    def test_squash_single_layer(self):
        test_dir = self.extract_test_tar('cli/hello-world.tar')
        target_dir = self.get_temp_dir()

        cli._container_inspector_squash(image_path=test_dir,
                                        extract_directory=target_dir)

        results = sorted([
            p.replace(target_dir, '')
            for p in fileutils.resource_iter(target_dir)
        ])
        expected = ['/hello']
        assert expected == results
Ejemplo n.º 24
0
 def test_resource_iter_with_files_no_dir(self):
     test_dir = self.get_test_loc('fileutils/walk')
     base = self.get_test_loc('fileutils')
     result = sorted([as_posixpath(f.replace(base, ''))
                      for f in fileutils.resource_iter(test_dir, with_files=True, with_dirs=False)])
     expected = [
         '/walk/f',
         '/walk/unicode.zip',
         '/walk/d1/f1',
         '/walk/d1/d2/f2',
         '/walk/d1/d2/d3/f3'
     ]
     assert sorted(expected) == sorted(result)
Ejemplo n.º 25
0
 def test_resource_iter_return_unicode_on_unicode_input(self):
     test_dir = self.get_test_loc('fileutils/walk')
     base = unicode(self.get_test_loc('fileutils'))
     result = sorted([
         as_posixpath(f.replace(base, ''))
         for f in fileutils.resource_iter(test_dir, with_dirs=True)
     ])
     expected = [
         u'/walk/d1', u'/walk/d1/d2', u'/walk/d1/d2/d3',
         u'/walk/d1/d2/d3/f3', u'/walk/d1/d2/f2', u'/walk/d1/f1',
         u'/walk/f', u'/walk/unicode.zip'
     ]
     assert sorted(expected) == sorted(result)
     assert all(isinstance(p, unicode) for p in result)
Ejemplo n.º 26
0
def test_extractcode_command_can_extract_shallow(monkeypatch):
    test_dir = test_env.get_test_loc('extract_shallow', copy=True)
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    runner = CliRunner()
    result = runner.invoke(extract_cli.extractcode, ['--shallow', test_dir])
    assert result.exit_code == 0
    file_result = [f for f in map(as_posixpath, resource_iter(test_dir, with_dirs=False)) if not f.endswith('unicodepath.tgz')]
    file_result = [''.join(f.partition('/top.zip-extract/')[1:]) for f in file_result]
    file_result = [f for f in file_result if f]
    # this checks that the zip in top.zip are not extracted
    expected = [
        '/top.zip-extract/some3.zip',
        '/top.zip-extract/some2.zip',
        '/top.zip-extract/some1.zip',
    ]
    assert sorted(expected) == sorted(file_result)
Ejemplo n.º 27
0
def test_scan_works_with_multiple_processes_and_timeouts():
    # this contains test files with a lot of copyrights that should
    # take more thant timeout to scan
    test_dir = test_env.get_test_loc('timeout', copy=True)
    # add some random bytes to the test files to ensure that the license results will
    # not be cached
    import time, random
    for tf in fileutils.resource_iter(test_dir, with_dirs=False):
        with open(tf, 'ab') as tfh:
            tfh.write('(c)' + str(time.time()) +
                      repr([random.randint(0, 10**6)
                            for _ in range(10000)]) + '(c)')

    result_file = test_env.get_temp_file('json')

    args = [
        '--copyright', '--processes', '2', '--timeout', '0.000001',
        '--strip-root', test_dir, '--json', result_file
    ]
    run_scan_click(args, expected_rc=1)

    expected = [
        [(u'path', u'test1.txt'), (u'type', u'file'), (u'authors', []),
         (u'copyrights', []), (u'holders', []),
         (u'scan_errors',
          [
              u'ERROR: for scanner: copyrights:\nERROR: Processing interrupted: timeout after 0 seconds.'
          ])],
        [(u'path', u'test2.txt'), (u'type', u'file'), (u'authors', []),
         (u'copyrights',
          []),
         (u'holders',
          []),
         (u'scan_errors', [
             u'ERROR: for scanner: copyrights:\nERROR: Processing interrupted: timeout after 0 seconds.'
         ])],
        [(u'path', u'test3.txt'), (u'type', u'file'), (u'authors', []),
         (u'copyrights', []), (u'holders', []),
         (u'scan_errors', [
             u'ERROR: for scanner: copyrights:\nERROR: Processing interrupted: timeout after 0 seconds.'
         ])]
    ]

    result_json = json.loads(open(result_file).read(),
                             object_pairs_hook=OrderedDict)
    assert sorted(sorted(x) for x in expected) == sorted(
        sorted(x.items()) for x in result_json['files'])
Ejemplo n.º 28
0
 def test_rebuild_rootfs_with_delete(self):
     test_dir = self.extract_test_tar('rootfs/she-image_from_scratch-1.0.tar')
     img = image.Image.get_images_from_dir(test_dir)[0]
     target_dir = self.get_temp_dir()
     rebuild_rootfs(img, target_dir)
     results = sorted([p.replace(target_dir, '')
         for p in fileutils.resource_iter(target_dir)])
     expected = [
         '/additions',
         '/additions/bar',
         '/additions/baz',
         '/additions/baz/this',
         '/additions/foo',
         '/additions/hello',
         '/hello',
     ]
     assert expected == results
Ejemplo n.º 29
0
 def test_resource_iter_return_unicode_on_unicode_input(self):
     test_dir = self.get_test_loc('fileutils/walk')
     base = unicode(self.get_test_loc('fileutils'))
     result = sorted([as_posixpath(f.replace(base, ''))
                      for f in fileutils.resource_iter(test_dir, with_files=True, with_dirs=True)])
     expected = [
         u'/walk/d1',
         u'/walk/d1/d2',
         u'/walk/d1/d2/d3',
         u'/walk/d1/d2/d3/f3',
         u'/walk/d1/d2/f2',
         u'/walk/d1/f1',
         u'/walk/f',
         u'/walk/unicode.zip'
     ]
     assert sorted(expected) == sorted(result)
     assert all(isinstance(p, unicode) for p in result)
Ejemplo n.º 30
0
def search_package(package_name, target, version=None):
    """
    Search `package_name` (with an optional `version`) in the `target` directory.
    Print results on screen.
    """

    if version:
        package_name = '*{}-{}*'.format(package_name, version)
    else:
        package_name = '*{}*'.format(package_name)
    thirdparty = resource_iter(target, with_dirs=False)
    dependency = [
        files for files in thirdparty
        if fnmatch.fnmatchcase(files, package_name)
    ]
    if dependency:
        whl = [files for files in dependency if files.endswith('.whl')]
        ## There are multiple version of same package So list of wheel will be considered.
        sdist = [files for files in dependency if files.endswith('.tar.gz')]
        about = [files for files in dependency if files.endswith('.ABOUT')]
        notice = [files for files in dependency if files.endswith('.NOTICE')]
        license = [files for files in dependency if files.endswith('.LICENSE')]
        print('\nSearched package wheel are:')
        print(*whl, sep='\n')
        if sdist:
            print('\nCorresponding sdist are:')
            print(*sdist, sep='\n')
        else:
            print('\nCorresponding sdist does not exits in target')
        if about:
            print('\nCorresponding .ABOUT are:')
            print(*about, sep='\n')
        else:
            print('\nCorresponding .ABOUT does not exits in target\n')
        if license:
            print('\nCorresponding .LICENSE are:')
            print(*licence, sep='\n')
        else:
            print('\nCorresponding .LICENSE does not exits in target')
        if notice:
            print('\nCorresponding .NOTICE are:')
            print(*notice, sep='\n')
        else:
            print('\nCorresponding .NOTICE does not exits in target\n')
    else:
        print('\nSpecified package does not exist\n')
Ejemplo n.º 31
0
def extract_twice(location, target_dir, extractor1, extractor2):
    """
    Extract a nested compressed archive at `location` to `target_dir` using
    the `extractor1` function to a temporary directory then the `extractor2`
    function on the extracted payload of `extractor1`.

    Return a list of warning messages. Raise exceptions on errors.

    Typical nested archives include compressed tarballs and RPMs (containing a
    compressed cpio).

    Note: it would be easy to support deeper extractor chains, but this gets
    hard to trace and debug very quickly. A depth of two is simple and sane and
    covers most common cases.
    """
    if on_linux and py2:
        location = fileutils.fsencode(location)
        target_dir = fileutils.fsencode(target_dir)
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = compat.unicode(
        os.path.abspath(os.path.expanduser(target_dir)))
    # extract first the intermediate payload to a temp dir
    temp_target = compat.unicode(
        fileutils.get_temp_dir(prefix='extractcode-extract-'))
    warnings = extractor1(abs_location, temp_target)
    if TRACE:
        logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

    # extract this intermediate payload to the final target_dir
    try:
        inner_archives = list(
            fileutils.resource_iter(temp_target, with_dirs=False))
        if not inner_archives:
            warnings.append(location + ': No files found in archive.')
        else:
            for extracted1_loc in inner_archives:
                if TRACE:
                    logger.debug(
                        'extract_twice: extractor2: %(extracted1_loc)r' %
                        locals())
                warnings.extend(extractor2(extracted1_loc, abs_target_dir))
    finally:
        # cleanup the temporary output from extractor1
        fileutils.delete(temp_target)
    return warnings
Ejemplo n.º 32
0
    def test_squash_multiple_layers(self):
        test_dir = self.extract_test_tar('cli/she-image_from_scratch-1.0.tar')
        target_dir = self.get_temp_dir()

        cli._container_inspector_squash(image_path=test_dir,
                                        extract_directory=target_dir)

        results = sorted([
            p.replace(target_dir, '')
            for p in fileutils.resource_iter(target_dir)
        ])
        expected = [
            '/additions',
            '/additions/bar',
            '/additions/baz',
            '/additions/baz/this',
            '/additions/foo',
            '/additions/hello',
            '/hello',
        ]
        assert expected == results
Ejemplo n.º 33
0
def resource_paths(base_path, diag, scans_cache_class, pre_scan_plugins=None):
    """
    Yield `Resource` objects for all the files found at base_path
    (either a directory or file) given an absolute base_path. Only yield
    Files, not directories.
    absolute path is a native OS path.
    base_path-relative path is a POSIX path.

    The relative path is guaranted to be unicode and may be URL-encoded and may not
    be suitable to address an actual file.
    """
    if base_path:
        if on_linux:
            base_path = path_to_bytes(base_path)
        else:
            base_path = path_to_unicode(base_path)

    base_path = os.path.abspath(os.path.normpath(
        os.path.expanduser(base_path)))
    base_is_dir = filetype.is_dir(base_path)
    len_base_path = len(base_path)
    ignores = {}
    if pre_scan_plugins:
        for plugin in pre_scan_plugins:
            ignores.update(plugin.get_ignores())
    ignores.update(ignore.ignores_VCS)

    ignorer = build_ignorer(ignores, unignores={})
    resources = fileutils.resource_iter(base_path, ignored=ignorer)

    for abs_path in resources:
        resource = Resource(scans_cache_class, abs_path, base_is_dir,
                            len_base_path)
        # always fetch infos and cache.
        resource.put_info(scan_infos(abs_path, diag=diag))
        if pre_scan_plugins:
            for plugin in pre_scan_plugins:
                resource = plugin.process_resource(resource)
        if resource:
            yield resource
Ejemplo n.º 34
0
 def test_resource_iter_can_walk_an_empty_dir(self):
     test_dir = self.get_temp_dir()
     result = list(fileutils.resource_iter(test_dir))
     expected = []
     assert expected == result
Ejemplo n.º 35
0
 def test_resource_iter_can_iterate_a_single_file(self):
     test_file = self.get_test_loc('fileutils/walk/f')
     result = [as_posixpath(f) for f in fileutils.resource_iter(test_file)]
     expected = [as_posixpath(test_file)]
     assert expected == result
Ejemplo n.º 36
0
def scan(input_path, copyright=True, license=True, package=True,  # @ReservedAssignment
         email=False, url=False, info=True, verbose=False, quiet=False):  # @ReservedAssignment
    """
    Do the scans proper, return results.
    """
    # save paths to report paths relative to the original input
    original_input = fileutils.as_posixpath(input_path)
    abs_input = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(input_path)))

    # note: "flag and function" expressions return the function if flag is True
    scanners = {
        'copyrights': copyright and get_copyrights,
        'licenses': license and get_licenses,
        'packages': package and get_package_infos,
        'emails': email and get_emails,
        'urls': url and get_urls,
        'infos': info and get_file_infos,
    }

    results = []

    # note: we inline progress display functions to close on some args

    def scan_start():
        """Progress event displayed at start of scan"""
        return style('Scanning files...', fg='green')

    def scan_event(item):
        """Progress event displayed each time a file is scanned"""
        if item:
            line = verbose and item or fileutils.file_name(item) or ''
            return 'Scanning: %(line)s' % locals()

    def scan_end():
        """Progress event displayed at end of scan"""
        has_warnings = False
        has_errors = False
        summary = []
        summary_color = 'green'
        summary_color = has_warnings and 'yellow' or summary_color
        summary_color = has_errors and 'red' or summary_color
        summary.append(style('Scanning done.', fg=summary_color, reset=True))
        return '\n'.join(summary)

    ignored = partial(ignore.is_ignored, ignores=ignore.ignores_VCS, unignores={})
    resources = fileutils.resource_iter(abs_input, ignored=ignored)

    with utils.progressmanager(resources,
                               item_show_func=scan_event,
                               start_show_func=scan_start,
                               finish_show_func=scan_end,
                               verbose=verbose,
                               show_pos=True,
                               quiet=quiet
                               ) as progressive_resources:

        for resource in progressive_resources:
            res = fileutils.as_posixpath(resource)

            # fix paths: keep the location as relative to the original input
            relative_path = utils.get_relative_path(original_input, abs_input, res)
            scan_result = OrderedDict(location=relative_path)
            # Should we yield instead?
            scan_result.update(scan_one(res, scanners))
            results.append(scan_result)

    # TODO: eventually merge scans for the same resource location...
    # TODO: fix absolute paths as relative to original input argument...

    return results