def test_get_bin_lib_dirs(self): root_dir = self.get_test_loc('command/bin', copy=True) for os_arch, paths in self.os_arches_files_test_matrix.items(): base_dir = os.path.join(root_dir, os_arch) bin_dir, lib_dir = command.get_bin_lib_dirs(base_dir) expected_bin, expected_lib, expected_bin_files, expected_lib_files = paths def norm(p): return os.path.abspath(os.path.normpath(p)) if expected_bin: assert os.path.exists(bin_dir) assert os.path.isdir(bin_dir) pbd = fileutils.as_posixpath(bin_dir) assert pbd.endswith(expected_bin.replace('command/', '')) if expected_bin_files: assert all(f in expected_bin_files for f in os.listdir(bin_dir)) == True else: assert expected_bin == bin_dir if expected_lib: assert os.path.exists(lib_dir) assert os.path.isdir(lib_dir) pld = fileutils.as_posixpath(lib_dir) assert pld.endswith(expected_lib.replace('command/', '')) if expected_lib_files: assert all(f in expected_lib_files for f in os.listdir(lib_dir)) == True else: assert expected_lib == lib_dir
def check_files(test_dir, expected): """ Walk test_dir. Check that all dirs are readable. Check that all files are: * non-special, * readable, * have a posix path that ends with one of the expected tuple paths. """ result = [] locs = [] if filetype.is_file(test_dir): test_dir = fileutils.parent_directory(test_dir) test_dir_path = fileutils.as_posixpath(test_dir) for top, _, files in os.walk(test_dir): for f in files: location = os.path.join(top, f) locs.append(location) path = fileutils.as_posixpath(location) path = path.replace(test_dir_path, '').strip('/') result.append(path) assert sorted(expected) == sorted(result) for location in locs: assert filetype.is_file(location) assert not filetype.is_special(location) assert filetype.is_readable(location)
def test_parent_directory_on_path_and_location_10(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'tst' expected_name = '/' result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert expected_name == result result = fileutils.parent_directory((os.path.join(test_dir, test_file))) result = fileutils.as_posixpath(result) assert result.endswith(expected_name)
def remove_backslashes_and_dotdots(directory): """ Walk a directory and rename the files if their names contain backslashes. Return a list of errors if any. """ errors = [] for top, _, files in os.walk(str(directory)): for filename in files: if not ('\\' in filename or '..' in filename): continue try: new_path = fileutils.as_posixpath(filename) new_path = new_path.strip('/') new_path = posixpath.normpath(new_path) new_path = new_path.replace('..', '/') new_path = new_path.strip('/') new_path = posixpath.normpath(new_path) segments = new_path.split('/') directory = os.path.join(top, *segments[:-1]) fileutils.create_dir(directory) shutil.move(os.path.join(top, filename), os.path.join(top, *segments)) except Exception: errors.append(os.path.join(top, filename)) return errors
def remove_backslashes_and_dotdots(directory): """ Walk a directory and rename the files if their names contain backslashes. Return a list of errors if any. """ if on_linux: directory = path_to_bytes(directory) errors = [] for top, _, files in os.walk(directory): for filename in files: if not (WIN_PATH_SEP in filename or DOTDOT in filename): continue try: new_path = fileutils.as_posixpath(filename) new_path = new_path.strip(POSIX_PATH_SEP) new_path = posixpath.normpath(new_path) new_path = new_path.replace(DOTDOT, POSIX_PATH_SEP) new_path = new_path.strip(POSIX_PATH_SEP) new_path = posixpath.normpath(new_path) segments = new_path.split(POSIX_PATH_SEP) directory = os.path.join(top, *segments[:-1]) fileutils.create_dir(directory) shutil.move(os.path.join(top, filename), os.path.join(top, *segments)) except Exception: errors.append(os.path.join(top, filename)) return errors
def is_datafile(cls, location, filetypes=tuple(), _bare_filename=False): """ Return True if the file at ``location`` is likely a package data file that this parser can handle. This implementation is based on: - matching the ``location`` as a whole with any one of the ``path_patterns`` sequence of patterns defined as a class attributes. The path patterns are for POSIX paths. - if defined, ensuring that the filetype of the file at ``location`` contains any of the type listed in the ``filetypes`` class attribute. - ``_bare_filename`` is for testing using a bare path that does not point to real files. Subclasses can override to implement more complex data file recognition. """ if filetype.is_file(location) or _bare_filename: loc = as_posixpath(location) if any(fnmatchcase(loc, pat) for pat in cls.path_patterns): filetypes = filetypes or cls.filetypes if not filetypes: return True else: T = contenttype.get_type(location) actual_type = T.filetype_file.lower() return any(ft in actual_type for ft in filetypes)
def test_extract_option_works_with_relative_paths(self): # The setup is a tad complex because we want to have a relative dir # to the base dir where we run tests from, ie the scancode-toolkit/ dir # To use relative paths, we use our tmp dir at the root of the code from os.path import dirname, join, abspath from commoncode import fileutils import extractcode import tempfile import shutil scancode_root = dirname(dirname(dirname(__file__))) scancode_tmp = join(scancode_root, 'tmp') fileutils.create_dir(scancode_tmp) scancode_root_abs = abspath(scancode_root) test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/') test_file = self.get_test_loc('extract_relative_path/basic.zip') shutil.copy(test_file, test_src_dir) test_src_file = join(test_src_dir, 'basic.zip') test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX runner = CliRunner() result = runner.invoke(cli.scancode, ['--extract', test_src_file]) assert result.exit_code == 0 assert 'Extracting done' in result.output assert not 'WARNING' in result.output assert not 'ERROR' in result.output expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt'] file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)] assert sorted(expected)==sorted(file_result)
def resource_paths(base_path, user_ignores): """ Yield tuples of (absolute path, base_path-relative path) for all the files found at base_path (either a directory or file) given an absolute base_path. Only yield Files, not directories. absolute path is a native OS path. base_path-relative path is a POSIX path. The relative path is guaranted to be unicode and may be URL-encoded and may not be suitable to address an actual file. """ base_path = os.path.abspath(os.path.normpath( os.path.expanduser(base_path))) base_is_dir = filetype.is_dir(base_path) len_base_path = len(base_path) ignores = dict() ignores.update(user_ignores) ignores.update(ignore.ignores_VCS) ignored = partial(ignore.is_ignored, ignores=ignores, unignores={}) resources = fileutils.resource_iter(base_path, ignored=ignored) for abs_path in resources: posix_path = fileutils.as_posixpath(abs_path) # fix paths: keep the path as relative to the original base_path rel_path = get_relative_path(posix_path, len_base_path, base_is_dir) yield abs_path, rel_path
def _match(path, patterns): """ Return a message if `path` is matched by a pattern from the `patterns` map or False. """ if not path or not patterns: return False path = fileutils.as_posixpath(path).lower() pathstripped = path.lstrip(POSIX_PATH_SEP) if not pathstripped: return False segments = paths.split(pathstripped) if DEBUG: logger.debug('_match: path: %(path)r patterns:%(patterns)r.' % locals()) mtch = False for pat, msg in patterns.items(): if not pat and not pat.strip(): continue msg = msg or EMPTY_STRING pat = pat.lstrip(POSIX_PATH_SEP).lower() is_plain = POSIX_PATH_SEP not in pat if is_plain: if any(fnmatch.fnmatchcase(s, pat) for s in segments): mtch = msg break elif (fnmatch.fnmatchcase(path, pat) or fnmatch.fnmatchcase(pathstripped, pat)): mtch = msg break if DEBUG: logger.debug('_match: match is %(mtch)r' % locals()) return mtch
def safe_path(path, posix=False): """ Convert `path` to a safe and portable POSIX path usable on multiple OSes. The returned path is an ASCII-only byte string, resolved for relative segments and itself relative. The `path` is treated as a POSIX path if `posix` is True or as a Windows path with blackslash separators otherwise. """ # if the path is UTF, try to use unicode instead if not isinstance(path, unicode): path = as_unicode(path) path = path.strip() if not is_posixpath(path): path = as_winpath(path) posix = False path = resolve(path, posix) _pathmod, path_sep = path_handlers(path, posix) segments = [s.strip() for s in path.split(path_sep) if s.strip()] segments = [portable_filename(s) for s in segments] # print('safe_path: orig:', orig_path, 'segments:', segments) if not segments: return '_' # always return posix sep = u'/' if isinstance(path, unicode) else b'/' path = sep.join(segments) return as_posixpath(path)
def extract_end(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = as_posixpath(xev.source) source = utils.get_relative_path(original_input, abs_input, source) for e in xev.errors: summary.append( style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False)) for warn in xev.warnings: summary.append( style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False)) summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' summary.append(style('Extracting done.', fg=summary_color, reset=True)) return '\n'.join(summary)
def resolve(path): """ Resolve and return a path-like string `path` to a posix relative path string where extra slashes including leading and trailing slashes, dot '.' and dotdot '..' path segments have been removed or normalized or resolved with the provided path "tree". When a dotdot path segment cannot be further resolved by "escaping" the provided path tree, it is replaced by the string 'dotdot'. """ slash, dot, dotdot = '/', '.', 'dotdot' if isinstance(path, unicode): slash, dot, dotdot = u'/', u'.', u'dotdot' if not path: return dot path = path.strip() if not path: return dot path = fileutils.as_posixpath(path) path = path.strip(slash) segments = [s.strip() for s in path.split(slash)] # remove empty (// or ///) or blank (space only) or single dot segments segments = [s for s in segments if s and s != '.'] path = slash.join(segments) # resolves .. path = posixpath.normpath(path) # replace .. with literal dotdot segments = path.split(slash) segments = [dotdot if s == '..' else s for s in segments] path = slash.join(segments) return path
def display_extract_summary(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = fileutils.as_posixpath(xev.source) if not isinstance(source, unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') source = utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) for e in xev.errors: echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red') for warn in xev.warnings: echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow') summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' echo_stderr('Extracting done.', fg=summary_color, reset=True)
def extract_end(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_results: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = as_posixpath(xev.source) source = utils.get_relative_path(original_input, abs_input, source) for e in xev.errors: summary.append(style('ERROR extracting: %(source)s: %(e)r' % locals(), fg='red', reset=False)) for warn in xev.warnings: summary.append(style('WARNING extracting: %(source)s: %(warn)r' % locals(), fg='yellow', reset=False)) summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' summary.append(style('Extracting done.', fg=summary_color, reset=True)) return '\n'.join(summary)
def test_scan_info_returns_full_root(): test_dir = test_env.extract_test_tar('info/basic.tgz') result = run_scan_click(['--info', '--full-root', test_dir]) assert result.exit_code == 0 assert 'Scanning done' in result.output assert fileutils.as_posixpath(test_dir) in result.output
def test_extractcode_command_works_with_relative_paths(monkeypatch): # The setup is a tad complex because we want to have a relative dir # to the base dir where we run tests from, ie the scancode-toolkit/ dir # To use relative paths, we use our tmp dir at the root of the code tree from os.path import dirname, join, abspath from commoncode import fileutils import extractcode import tempfile import shutil try: scancode_root = dirname(dirname(dirname(__file__))) scancode_tmp = join(scancode_root, 'tmp') fileutils.create_dir(scancode_tmp) scancode_root_abs = abspath(scancode_root) test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/') test_file = test_env.get_test_loc('extract_relative_path/basic.zip') shutil.copy(test_file, test_src_dir) test_src_file = join(test_src_dir, 'basic.zip') test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX runner = CliRunner() monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) result = runner.invoke(extract_cli.extractcode, [test_src_file]) assert result.exit_code == 0 assert 'Extracting done' in result.output assert not 'WARNING' in result.output assert not 'ERROR' in result.output expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt'] file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.resource_iter(test_tgt_dir, with_dirs=False)] assert sorted(expected) == sorted(file_result) finally: fileutils.delete(test_src_dir)
def display_extract_summary(): """ Display a summary of warnings and errors if any. """ has_warnings = False has_errors = False summary = [] for xev in extract_result_with_errors: has_errors = has_errors or bool(xev.errors) has_warnings = has_warnings or bool(xev.warnings) source = fileutils.as_posixpath(xev.source) if not isinstance(source, compat.unicode): source = toascii(source, translit=True).decode('utf-8', 'replace') source = get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) for e in xev.errors: echo_stderr('ERROR extracting: %(source)s: %(e)s' % locals(), fg='red') for warn in xev.warnings: echo_stderr('WARNING extracting: %(source)s: %(warn)s' % locals(), fg='yellow') summary_color = 'green' if has_warnings: summary_color = 'yellow' if has_errors: summary_color = 'red' echo_stderr('Extracting done.', fg=summary_color, reset=True)
def resolve(path, posix=True): """ Return a resolved relative POSIX path from `path` where extra slashes including leading and trailing slashes are removed, dot '.' and dotdot '..' path segments have been removed or resolved as possible. When a dotdot path segment cannot be further resolved and would be "escaping" from the provided path "tree", it is replaced by the string 'dotdot'. The `path` is treated as a POSIX path if `posix` is True (default) or as a Windows path with blackslash separators otherwise. """ is_unicode = isinstance(path, unicode) dot = is_unicode and u'.' or b'.' if not path: return dot path = path.strip() if not path: return dot if not is_posixpath(path): path = as_winpath(path) posix = False pathmod, path_sep = path_handlers(path, posix) path = path.strip(path_sep) segments = [s.strip() for s in path.split(path_sep) if s.strip()] # remove empty (// or ///) or blank (space only) or single dot segments segments = [s for s in segments if s and s != dot] path = path_sep.join(segments) # resolves . dot, .. dotdot path = pathmod.normpath(path) segments = path.split(path_sep) # remove empty or blank segments segments = [s.strip() for s in segments if s and s.strip()] # is this a windows absolute path? if yes strip the colon to make this relative if segments and len(segments[0]) == 2 and segments[0].endswith(':'): segments[0] = segments[0][:-1] # replace any remaining (usually leading) .. segment with a literal "dotdot" dotdot = is_unicode and u'dotdot' or b'dotdot' dd = is_unicode and u'..' or b'..' segments = [dotdot if s == dd else s for s in segments if s] if segments: path = path_sep.join(segments) else: path = dot path = as_posixpath(path) return path
def __init__(self, cache_dir): # subdirs for info and scans_dir caches if on_linux: infos_dir = b'infos_dir/' scans_dir = b'scans_dir/' files_log = b'files_log' self.cache_base_dir = path_to_bytes(cache_dir) else: infos_dir = u'infos_dir/' scans_dir = u'scans_dir/' files_log = u'files_log' self.cache_base_dir = cache_dir self.cache_infos_dir = as_posixpath(os.path.join(self.cache_base_dir, infos_dir)) self.cache_scans_dir = as_posixpath(os.path.join(self.cache_base_dir, scans_dir)) self.cache_files_log = as_posixpath(os.path.join(self.cache_base_dir, files_log))
def _load_json_result(result_file, test_dir): """ Load the result file as utf-8 JSON and strip test_dir prefix from locations. Sort the results by location. """ test_dir = as_posixpath(test_dir) with codecs.open(result_file, encoding='utf-8') as res: scan_result = json.load(res, object_pairs_hook=OrderedDict) for result in scan_result['results']: loc = result['location'] loc = as_posixpath(loc).replace(test_dir, '').strip('/') result['location'] = loc if scan_result.get('scancode_version'): del scan_result['scancode_version'] scan_result['results'].sort(key=lambda x: x['location']) return scan_result
def test_is_ignored_skip_vcs_files_and_dirs(self): test_dir = self.extract_test_tar('ignore/vcs.tgz') result = [] for top, dirs, files in os.walk(test_dir, topdown=True): not_ignored = [] for d in dirs: p = os.path.join(top, d) ign = ignore.is_ignored(p, ignore.default_ignores, {}) tp = fileutils.as_posixpath(p.replace(test_dir, '')) result.append(( tp, ign, )) if not ign: not_ignored.append(d) # skip ignored things dirs[:] = not_ignored for f in files: p = os.path.join(top, f) ign = ignore.is_ignored(p, ignore.default_ignores, {}) tp = fileutils.as_posixpath(p.replace(test_dir, '')) result.append(( tp, ign, )) expected = [ ('/vcs', False), ('/vcs/.bzr', True), ('/vcs/.git', True), ('/vcs/.hg', True), ('/vcs/.repo', True), ('/vcs/.svn', True), ('/vcs/CVS', True), ('/vcs/_darcs', True), ('/vcs/_MTN', True), ('/vcs/.bzrignore', True), ('/vcs/.cvsignore', True), ('/vcs/.gitignore', True), ('/vcs/.hgignore', True), ('/vcs/.svnignore', True), ('/vcs/vssver.scc', True), ] assert sorted(expected) == sorted(result)
def test_skip_vcs_files_and_dirs(self): test_dir = self.extract_test_tar('ignore/vcs.tgz') result = [] for top, dirs, files in os.walk(test_dir, topdown=True): not_ignored = [] for d in dirs: p = os.path.join(top, d) ign = ignore.is_ignored(p, ignore.default_ignores, {}) tp = fileutils.as_posixpath(p.replace(test_dir, '')) result.append(( tp, ign, )) if not ign: not_ignored.append(d) # skip ignored things dirs[:] = not_ignored for f in files: p = os.path.join(top, f) ign = ignore.is_ignored(p, ignore.default_ignores, {}) tp = fileutils.as_posixpath(p.replace(test_dir, '')) result.append(( tp, ign, )) expected = [ ('/vcs', False), ('/vcs/.bzr', 'Default ignore: Bazaar artifact'), ('/vcs/.git', 'Default ignore: Git artifact'), ('/vcs/.hg', 'Default ignore: Mercurial artifact'), ('/vcs/.repo', 'Default ignore: Multiple Git repository artifact'), ('/vcs/.svn', 'Default ignore: SVN artifact'), ('/vcs/CVS', 'Default ignore: CVS artifact'), ('/vcs/_darcs', 'Default ignore: Darcs artifact'), ('/vcs/_MTN', 'Default ignore: Monotone artifact'), ('/vcs/.bzrignore', 'Default ignore: Bazaar config artifact'), ('/vcs/.cvsignore', 'Default ignore: CVS config artifact'), ('/vcs/.gitignore', 'Default ignore: Git config artifact'), ('/vcs/.hgignore', 'Default ignore: Mercurial config artifact'), ('/vcs/.svnignore', 'Default ignore: SVN config artifact'), ('/vcs/vssver.scc', 'Default ignore: Visual Source Safe artifact'), ] assert sorted(expected) == sorted(result)
def patch_text(ptch): """ Return the patch text content as an iterable of lines given a ptch 'patch item'. The content is re-formatted as unified diff. """ for head in ptch.header: yield head yield '--- ' + fileutils.as_posixpath(ptch.source) yield '+++ ' + fileutils.as_posixpath(ptch.target) hk = '@@ -%(startsrc)d,%(linessrc)d +%(starttgt)d,%(linestgt)d @@ %(desc)s' for hunk in ptch.hunks: yield hk % hunk.__dict__ for line in hunk.text: yield line
def patch_info(location): """ Return a list of tuples of (src_path, target_path, patch_text) for each patch segment of a patch file at location. Raise an exception if the file is not a patch file or cannot be parsed. """ patchset = pythonpatch.fromfile(location) if not patchset: msg = 'Unable to parse patch file: %(location)s' % locals() raise ExtractErrorFailedToExtract(msg) for ptch in patchset.items: src = fileutils.as_posixpath(ptch.source.strip()) tgt = fileutils.as_posixpath(ptch.target.strip()) text = [l.strip() for l in patch_text(ptch) if l] yield src, tgt, text
def patch_info(location): """ Yield an iterable of tuples of (src_path, target_path, patch_text) for each patch segment of a patch file at location. Raise an exception if the file is not a patch file or cannot be parsed. """ patchset = pythonpatch.fromfile(location) if not patchset: msg = 'Unable to parse patch file: %(location)s' % locals() raise ExtractErrorFailedToExtract(msg) for ptch in patchset.items: src = fileutils.as_posixpath(ptch.source.strip()) tgt = fileutils.as_posixpath(ptch.target.strip()) text = [l.strip() for l in patch_text(ptch) if l] yield src, tgt, text
def test_scan_info_returns_full_root(): test_dir = test_env.extract_test_tar('info/basic.tgz') result_file = test_env.get_temp_file('json') args = ['--info', '--full-root', test_dir, '--json', result_file] run_scan_click(args) result_data = json.loads(open(result_file).read()) file_paths = [f['path'] for f in result_data['files']] assert len(file_paths) == 12 root = fileutils.as_posixpath(test_dir) assert all(p.startswith(root) for p in file_paths)
def __init__(self, cache_dir): # subdirs for info and scans_dir caches if on_linux: infos_dir = b'infos_dir/' scans_dir = b'scans_dir/' files_log = b'files_log' self.cache_base_dir = path_to_bytes(cache_dir) else: infos_dir = u'infos_dir/' scans_dir = u'scans_dir/' files_log = u'files_log' self.cache_base_dir = cache_dir self.cache_infos_dir = as_posixpath( os.path.join(self.cache_base_dir, infos_dir)) self.cache_scans_dir = as_posixpath( os.path.join(self.cache_base_dir, scans_dir)) self.cache_files_log = as_posixpath( os.path.join(self.cache_base_dir, files_log))
def test_fileutils_walk(self): test_dir = self.get_test_loc('fileutils/walk') base = self.get_test_loc('fileutils') result = [(as_posixpath(t.replace(base, '')), d, sorted(f),) for t, d, f in fileutils.walk(test_dir)] expected = [ ('/walk', ['d1'], ['f', 'unicode.zip']), ('/walk/d1', ['d2'], ['f1']), ('/walk/d1/d2', ['d3'], ['f2']), ('/walk/d1/d2/d3', [], ['f3']) ] assert expected == result
def __init__(self, scan_cache_class, abs_path, base_is_dir, len_base_path): self.scan_cache_class = scan_cache_class() self.is_cached = False self.abs_path = abs_path self.base_is_dir = base_is_dir posix_path = as_posixpath(abs_path) # fix paths: keep the path as relative to the original # base_path. This is always Unicode self.rel_path = get_relative_path(posix_path, len_base_path, base_is_dir) self.infos = OrderedDict() self.infos['path'] = self.rel_path
def _get_root_dir(input_path, strip_root=False, full_root=False): """ Return a root dir name or None. On Windows, the path uses POSIX (forward slash) separators. """ if strip_root: return scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path))) scanned_path = fileutils.as_posixpath(scanned_path) if filetype.is_dir(scanned_path): root_dir = scanned_path else: root_dir = fileutils.parent_directory(scanned_path) root_dir = fileutils.as_posixpath(root_dir) if full_root: return root_dir else: return fileutils.file_name(root_dir)
def get_matches(path, patterns, all_matches=False): """ Return a list of values (which are values from the matched `patterns` mappint of {pattern: value or message} if `path` is matched by any of the pattern from the `patterns` map or an empty list. If `all_matches` is False, stop and return on the first matched pattern. """ if not path or not patterns: return False path = fileutils.as_posixpath(path).lower() pathstripped = path.lstrip(POSIX_PATH_SEP) if not pathstripped: return False segments = paths.split(pathstripped) if TRACE: logger.debug('_match: path: %(path)r patterns:%(patterns)r.' % locals()) matches = [] if not isinstance(patterns, dict): assert isinstance( patterns, (list, tuple)), 'Invalid patterns: {}'.format(patterns) patterns = {p: p for p in patterns} for pat, value in patterns.items(): if not pat or not pat.strip(): continue value = value or EMPTY_STRING pat = pat.lstrip(POSIX_PATH_SEP).lower() is_plain = POSIX_PATH_SEP not in pat if is_plain: if any(fnmatch.fnmatchcase(s, pat) for s in segments): matches.append(value) if not all_matches: break elif (fnmatch.fnmatchcase(path, pat) or fnmatch.fnmatchcase(pathstripped, pat)): matches.append(value) if not all_matches: break if TRACE: logger.debug('_match: matches: %(matches)r' % locals()) if not all_matches: if matches: return matches[0] else: return False return matches
def test_fileutils_walk(self): test_dir = self.get_test_loc("fileutils/walk") base = self.get_test_loc("fileutils") result = [(as_posixpath(t.replace(base, "")), d, f) for t, d, f in fileutils.walk(test_dir)] expected = [ ("/walk", ["d1"], ["f", "unicode.zip"]), ("/walk/d1", ["d2"], ["f1"]), ("/walk/d1/d2", ["d3"], ["f2"]), ("/walk/d1/d2/d3", [], ["f3"]), ] assert expected == result
def test_resource_iter_with_dir_only(self): test_dir = self.get_test_loc('fileutils/walk') base = self.get_test_loc('fileutils') result = sorted([as_posixpath(f.replace(base, '')) for f in fileutils.resource_iter(test_dir, with_files=False, with_dirs=True)]) expected = [ '/walk/d1', '/walk/d1/d2', '/walk/d1/d2/d3', ] assert sorted(expected) == sorted(result)
def test_scan_info_returns_full_root(): test_dir = test_env.extract_test_tar('info/basic.tgz') result_file = test_env.get_temp_file('json') result = run_scan_click(['--info', '--full-root', test_dir, result_file]) assert result.exit_code == 0 assert 'Scanning done' in result.output result_data = json.loads(open(result_file, 'rb').read()) file_paths = [f['path'] for f in result_data['files']] assert 11 == len(file_paths) root = fileutils.as_posixpath(test_dir) assert all(p.startswith(root) for p in file_paths)
def test_scan_info_returns_correct_full_root_with_single_file(): test_file = test_env.get_test_loc('info/basic.tgz') result_file = test_env.get_temp_file('json') args = ['--info', '--full-root', test_file, '--json', result_file] run_scan_click(args) result_data = json.loads(open(result_file).read()) files = result_data['files'] # we have a single file assert len(files) == 1 scanned_file = files[0] # and we check that the path is the full path without repeating the file name assert scanned_file['path'] == fileutils.as_posixpath(test_file)
def _get_root_dir(input_path, strip_root=False, full_root=False): """ Return a root dir name or None. On Windows, the path uses POSIX (forward slash) separators. """ if strip_root: return scanned_path = os.path.abspath( os.path.normpath(os.path.expanduser(input_path))) scanned_path = fileutils.as_posixpath(scanned_path) if filetype.is_dir(scanned_path): root_dir = scanned_path else: root_dir = fileutils.parent_directory(scanned_path) root_dir = fileutils.as_posixpath(root_dir) if full_root: return root_dir else: return fileutils.file_name(root_dir)
def test_scancode_skip_vcs_files_and_dirs_by_default(self): test_dir = self.extract_test_tar('ignore/vcs.tgz') runner = CliRunner() output_json = self.get_temp_file('json') result = runner.invoke(cli.scancode, ['--copyright', test_dir, output_json]) assert result.exit_code == 0 with open(output_json) as res: scan_result = json.load(res) # a single test.tst file that is not a VCS file should be listed assert 1 == scan_result['count'] scan_loc = as_posixpath(scan_result['results'][0]['location']) assert scan_loc.endswith('vcs.tgz/vcs/test.txt')
def test_resource_iter_with_dirs(self): test_dir = self.get_test_loc('fileutils/walk') base = self.get_test_loc('fileutils') result = sorted([ as_posixpath(f.replace(base, '')) for f in fileutils.resource_iter(test_dir, with_dirs=True) ]) expected = [ '/walk/d1', '/walk/d1/d2', '/walk/d1/d2/d3', '/walk/d1/d2/d3/f3', '/walk/d1/d2/f2', '/walk/d1/f1', '/walk/f', '/walk/unicode.zip' ] assert sorted(expected) == sorted(result)
def test_file_iter(self): test_dir = self.get_test_loc('fileutils/walk') base = self.get_test_loc('fileutils') result = [as_posixpath(f.replace(base, '')) for f in fileutils.file_iter(test_dir)] expected = [ '/walk/f', '/walk/unicode.zip', '/walk/d1/f1', '/walk/d1/d2/f2', '/walk/d1/d2/d3/f3' ] assert sorted(expected) == sorted(result)
def test_skip_vcs_files_and_dirs(self): test_dir = self.extract_test_tar('ignore/vcs.tgz') result = [] for top, dirs, files in os.walk(test_dir, topdown=True): not_ignored = [] for d in dirs: p = os.path.join(top, d) ign = ignore.is_ignored(p, ignore.default_ignores, {}) tp = fileutils.as_posixpath(p.replace(test_dir, '')) result.append((tp, ign,)) if not ign: not_ignored.append(d) # skip ignored things dirs[:] = not_ignored for f in files: p = os.path.join(top, f) ign = ignore.is_ignored(p, ignore.default_ignores, {}) tp = fileutils.as_posixpath(p.replace(test_dir, '')) result.append((tp, ign,)) expected = [ ('/vcs', False), ('/vcs/.bzr', 'Default ignore: Bazaar artifact'), ('/vcs/.git', 'Default ignore: Git artifact'), ('/vcs/.hg', 'Default ignore: Mercurial artifact'), ('/vcs/.repo', 'Default ignore: Multiple Git repository artifact'), ('/vcs/.svn', 'Default ignore: SVN artifact'), ('/vcs/CVS', 'Default ignore: CVS artifact'), ('/vcs/_darcs', 'Default ignore: Darcs artifact'), ('/vcs/_MTN', 'Default ignore: Monotone artifact'), ('/vcs/.bzrignore', 'Default ignore: Bazaar config artifact'), ('/vcs/.cvsignore', 'Default ignore: CVS config artifact'), ('/vcs/.gitignore', 'Default ignore: Git config artifact'), ('/vcs/.hgignore', 'Default ignore: Mercurial config artifact'), ('/vcs/.svnignore', 'Default ignore: SVN config artifact'), ('/vcs/vssver.scc', 'Default ignore: Visual Source Safe artifact'), ] assert sorted(expected) == sorted(result)
def test_resource_iter_with_files_no_dir(self): test_dir = self.get_test_loc('fileutils/walk') base = self.get_test_loc('fileutils') result = sorted([as_posixpath(f.replace(base, '')) for f in fileutils.resource_iter(test_dir, with_files=True, with_dirs=False)]) expected = [ '/walk/f', '/walk/unicode.zip', '/walk/d1/f1', '/walk/d1/d2/f2', '/walk/d1/d2/d3/f3' ] assert sorted(expected) == sorted(result)
def extract_event(item): """ Display an extract event. """ if not item: return '' if verbose: if item.done: return '' line = utils.get_relative_path(original_input, abs_input, as_posixpath(item.source)) or '' else: line = fileutils.file_name(item.source) or '' return 'Extracting: %(line)s' % locals()
def get_template(location): """ Return a Jinja template object loaded from the file at `location`. """ from jinja2 import Environment, FileSystemLoader location = as_posixpath(abspath(expanduser(location))) assert isfile(location) template_dir = parent_directory(location) env = Environment(loader=FileSystemLoader(template_dir)) template_name = file_name(location) return env.get_template(template_name)
def test_scan_info_returns_correct_full_root_with_single_file(): test_file = test_env.get_test_loc('info/basic.tgz') result_file = test_env.get_temp_file('json') result = run_scan_click(['--info', '--full-root', test_file, result_file]) assert result.exit_code == 0 assert 'Scanning done' in result.output result_data = json.loads(open(result_file, 'rb').read()) files = result_data['files'] # we have a single file assert len(files) == 1 scanned_file = files[0] # and we check that the path is the full path without repeating the file name assert fileutils.as_posixpath(test_file) == scanned_file['path']
def test_resource_iter_return_unicode_on_unicode_input(self): test_dir = self.get_test_loc('fileutils/walk') base = unicode(self.get_test_loc('fileutils')) result = sorted([as_posixpath(f.replace(base, '')) for f in fileutils.resource_iter(test_dir, with_files=True, with_dirs=True)]) expected = [ u'/walk/d1', u'/walk/d1/d2', u'/walk/d1/d2/d3', u'/walk/d1/d2/d3/f3', u'/walk/d1/d2/f2', u'/walk/d1/f1', u'/walk/f', u'/walk/unicode.zip' ] assert sorted(expected) == sorted(result) assert all(isinstance(p, unicode) for p in result)
def test_walk_can_be_extended_while_walking(self): test_dir = self.get_temp_dir() self.touch(os.path.join(test_dir, 'file')) self.touch(os.path.join(test_dir, 'arch.gz')) os.mkdir(os.path.join(test_dir, 'dir')) self.touch(os.path.join(test_dir, 'dir', 'otherarch.gz')) allpaths = [] for top, dirs, files in self.extract_walker(test_dir): allpaths.extend([as_posixpath(os.path.join(top, d).replace(test_dir, '')) for d in dirs + files]) expected = [ '/arch.gzextract/extracted_file', '/dir', '/arch.gz', '/file', '/dir/otherarch.gzextract/extracted_file', '/dir/otherarch.gz' ] assert sorted(expected) == sorted(allpaths)
def safe_path(path, lowered=True, resolved=True): """ Convert a path-like string `path` to a posix path string safer to use as a file path on all OSes. The path is lowercased. Non-ASCII alphanumeric characters and spaces are replaced with an underscore. The path is optionally resolved and lowercased. """ safe = path.strip() # TODO: replace COM/PRN/LPT windows special names # TODO: resolve 'UNC' windows paths # TODO: strip leading windows drives # remove any unsafe chars safe = safe.translate(path_safe) safe = text.toascii(safe) safe = fileutils.as_posixpath(safe) if lowered: safe = safe.lower() if resolved: safe = resolve(safe) return safe