def _filter_by_types(filenames, types, exclude_types): types, exclude_types = frozenset(types), frozenset(exclude_types) ret = [] for filename in filenames: tags = tags_from_path(filename) if tags >= types and not tags & exclude_types: ret.append(filename) return tuple(ret)
def test_tags_from_path_plist_binary(tmpdir): x = tmpdir.join('t.plist') x.write_binary( b'bplist00\xd1\x01\x02_\x10\x0fLast Login NameWDefault\x08\x0b\x1d\x00' b'\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00%', ) assert identify.tags_from_path(x.strpath) == { 'file', 'plist', 'binary', 'non-executable', }
def test_tags_from_path_file_with_shebang_executable(tmpdir): x = tmpdir.join('test') x.write_text('#!/usr/bin/env python\nimport sys\n', encoding='UTF-8') make_executable(x.strpath) assert identify.tags_from_path(x.strpath) == { 'file', 'text', 'executable', 'python', }
def __identify_tags(self): """ This function will identify file type tags from file content. It will help us to identify txt file types. Like py, js, html, power shell etc. :return: It will return tuple boolean and dictionary or string depending upon return type. """ try: file_type_tags = identify.tags_from_path(self.__file_path__) return True, file_type_tags except ValueError: return False, 'No such file or directory.'
def discovery( path: str, tags_to_find: List[str], ) -> List[str]: # look at directory or file at path, get tags for each file save # save wanted file (path) to a list and return the list files_to_check = [] if os.path.isdir(path): for item in os.listdir(path): item_path = os.path.join(path, item) tags = identify.tags_from_path(item_path) for tf in tags_to_find: if tf in tags: files_to_check.append(item_path) else: tags = identify.tags_from_path(path) for tf in tags_to_find: if tf in tags: files_to_check.append(path) return files_to_check
def convert(path_from_root: str): """Convert file to a TOML config.""" tags = identify.tags_from_path(path_from_root) if "yaml" in tags: which_doc = YamlDoc(path=path_from_root) else: raise NotImplementedError(f"No conversion for these types: {tags}") toml_doc = TomlDoc(obj={path_from_root: which_doc.as_object}, use_tomlkit=True) print(toml_doc.reformatted) return toml_doc.reformatted
def _filenames( files_re: Pattern[str], exclude_re: Pattern[str], tests_re: Pattern[str], ) -> Generator[Tuple[str, bool], None, None]: # TODO: zsplit is more correct than splitlines out = subprocess.check_output(('git', 'ls-files')).decode() for filename in out.splitlines(): if (not files_re.search(filename) or exclude_re.search(filename) or 'python' not in tags_from_path(filename)): continue yield filename, bool(tests_re.search(filename))
def get_file_list(directory: str) -> list: # file names must be digits only file_list = [] if os.path.isdir(directory): for item in os.listdir(directory): item_path = os.path.join(directory, item) tags = identify.tags_from_path(item_path) if "text" in tags and item.isdigit(): file_list.append(item_path) else: file_list.sort() return file_list else: return []
def test_tags_from_path_plist_text(tmpdir): x = tmpdir.join('t.plist') x.write( '<?xml version="1.0" encoding="UTF-8"?>\n' '<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">\n' # noqa: E501 '<plist version="1.0">\n' '<dict>\n' '\t<key>Last Login Name</key>\n' '\t<string>Default</string>\n' '</dict>\n' '</plist>\n', ) assert identify.tags_from_path(x.strpath) == { 'file', 'plist', 'text', 'non-executable', }
def all_filenames_in_dir(path=None, ignore_paths=None): path = path or os.getcwd() # http://stackoverflow.com/a/2186565 paths = set() for root, dirnames, filenames in os.walk(path, topdown=True): try: for ignore in ignore_paths: dirnames.remove(ignore) except ValueError: pass if should_ignore_path(root, ignore_paths): continue for filename in filenames: full_path = os.path.join(root, filename) if "text" in identify.tags_from_path(full_path): paths.add(full_path) return paths
def filesystem_tree(same_filesystem, paths): if len(paths) == 0: paths = ["."] type_sizes = defaultdict(int) for path in paths: for root, dirs, files in walk(path): if same_filesystem: dirs[:] = [dir for dir in dirs if not ismount(join(root, dir))] file_types = [tags_from_path(join(root, f)) for f in files] file_sizes = [getsize(join(root, f)) for f in files] for filetype, size in zip(file_types, file_sizes): type_sizes[filetype] += size for filetype, size in type_sizes.items(): click.echo(f"{filetype}: {size}")
def run_per_file(config, ignore_paths=None, path=None, config_dir=None): ignore_paths = ignore_paths or [] path = path or os.getcwd() cmd = run_config(config, config_dir) run_cmds = [] patterns = PATTERNS.get(config.get("language")) concurrency = config.get("concurrency") paths = all_filenames_in_dir(path=path, ignore_paths=ignore_paths) for pattern in patterns: for filepath in fnmatch.filter(paths, pattern): if "text" in identify.tags_from_path(filepath): run_cmds.append(cmd + [filepath]) pool = Pool(processes=concurrency) def result(run_cmd): _, out = run_command(run_cmd, timeout=5) return run_cmd[-1], out.strip() output = pool.map(result, run_cmds) return output
def all_filenames_in_dir(self, changed_filenames=None): # http://stackoverflow.com/a/2186565 changed_filenames = changed_filenames or set() paths = set() for root, dirnames, filenames in os.walk(os.getcwd(), topdown=True): for ignore in self.ignore_paths: try: dirnames.remove(ignore) except ValueError: pass if self.should_ignore_path(root): continue for filename in filenames: full_path = os.path.join(root, filename) if "text" in identify.tags_from_path(full_path): if (changed_filenames and os.path.relpath( os.path.normcase(full_path)).replace( "\\", "/").strip() not in changed_filenames): continue paths.add(full_path) print("Filenames in dir count: {}".format(len(paths))) return paths
def _types_for_file(self, filename: str) -> Set[str]: return tags_from_path(filename)
def test_tags_from_path_simple_file(tmpdir): x = tmpdir.join('test.py').ensure() assert identify.tags_from_path(x.strpath) == { 'file', 'text', 'non-executable', 'python', }
def test_tags_from_path_directory(tmpdir): x = tmpdir.join('foo') x.mkdir() assert identify.tags_from_path(x.strpath) == {'directory'}
def test_tags_from_path_broken_symlink(tmpdir): x = tmpdir.join('foo') x.mksymlinkto(tmpdir.join('lol')) assert identify.tags_from_path(x.strpath) == {'symlink'}
def test_tags_from_path_does_not_exist(tmpdir): x = tmpdir.join('foo') with pytest.raises(ValueError): identify.tags_from_path(x.strpath)
def _types_for_file(self, filename): try: return self._types_cache[filename] except KeyError: ret = self._types_cache[filename] = tags_from_path(filename) return ret
def apply_fix(*, ls_files_cmd, sed_cmd): filenames = zsplit(subprocess.check_output(ls_files_cmd)) filenames = [f.decode() for f in filenames] filenames = [f for f in filenames if tags_from_path(f) & {'file', 'text'}] autofix_lib.run(*sed_cmd, *filenames)
def file_tags(filename): return list(identify.tags_from_path(filename))