def get_files_in_directory(path, file_type): """ Gets the list of files in the directory and subdirectories Respects .russellignore file if present """ local_files = [] separator = os.path.sep ignore_list, whitelist = RussellIgnoreManager.get_list() # make sure that subdirectories are also excluded ignore_list_expanded = ignore_list + [ "{}/**".format(item) for item in ignore_list ] russell_logger.debug("Ignoring list : {}".format(ignore_list)) total_file_size = 0 for root, dirs, files in os.walk(path): russell_logger.debug("Root:{}, Dirs:{}".format(root, dirs)) ignore_dir = False normalized_path = normalize_path(path, root) for item in ignore_list_expanded: if PurePath(normalized_path).match(item): ignore_dir = True break if ignore_dir: # Reset dirs to avoid going further down this directory dirs[:] = [] russell_logger.debug("Ignoring directory : {}".format(root)) continue for file_name in files: ignore_file = False normalized_path = normalize_path(path, os.path.join(root, file_name)) for item in ignore_list_expanded: if PurePath(normalized_path).match(item): ignore_file = True break if ignore_file: russell_logger.debug( "Ignoring file : {}".format(normalized_path)) continue file_relative_path = os.path.join(root, file_name) if separator != '/': # convert relative paths to Unix style file_relative_path = file_relative_path.replace( os.path.sep, '/') file_full_path = os.path.join(os.getcwd(), root, file_name) local_files.append( (file_type, (file_relative_path, open(file_full_path, 'rb'), 'text/plain'))) total_file_size += os.path.getsize(file_full_path) return (local_files, sizeof_fmt(total_file_size), total_file_size)
def glob_files(root_dir, includes=None, excludes=None, gitignore=None): """Powerful and flexible utility to search and tag files using patterns. :param root_dir: directory where we start the search :param includes: list or iterator of include pattern tuples (pattern, tag) :param excludes: list or iterator of exclude patterns :param gitignore: list of ignore patterns (gitwildcard format) :return: iterator of (absolute_path, relative_path) """ # docu here: https://docs.python.org/3/library/pathlib.html if not includes: includes = ['**'] else: # we need to iterate multiple times (iterator safeguard) includes = list(includes) if excludes: # we need to iterate multiple times (iterator safeguard) excludes = list(excludes) if gitignore: spec = pathspec.PathSpec.from_lines('gitwildmatch', gitignore) log.debug('gitignore patterns: %s', gitignore) while includes: pattern = includes.pop(0) # for compatibility with std. python Lib/glop.py: # >>>If recursive is true, the pattern '**' will match any files and # zero or more directories and subdirectories.<<< if pattern.endswith('**'): pattern += '/*' matches = list(Path(root_dir).glob(pattern)) for m in matches: if m.is_dir(): continue # some discussion on how to convert a pattern into regex: # http://stackoverflow.com/questions/27726545/python-glob-but-against-a-list-of-strings-rather-than-the-filesystem pp = PurePath(m) # check if m is contained in remaining include patterns # (last one wins) if includes and any(map(lambda p: pp.match(p), includes)): continue # check if m is contained in exclude pattern if excludes and any(map(lambda p: pp.match(p), excludes)): continue # check if m is contained in finkignore if gitignore and spec.match_file(str(m)): log.debug('Skipped file \'%s\' due to gitignore pattern', str(m.relative_to(root_dir))) continue yield (str(m), str(m.relative_to(root_dir)))
def path2unix(path, nojoin=False, fromwinpath=False): '''From a path given in any format, converts to posix path format fromwinpath=True forces the input path to be recognized as a Windows path (useful on Unix machines to unit test Windows paths)''' if fromwinpath: pathparts = list(PureWindowsPath(path).parts) else: pathparts = list(PurePath(path).parts) if nojoin: return pathparts else: return posixpath.join(*pathparts)
def matches_glob_list(path, glob_list): """ Given a list of glob patterns, returns a boolean indicating if a path matches any glob in the list """ for glob in glob_list: try: if PurePath(path).match(glob): return True except TypeError: pass return False
def create_html_index(target_dir, parent_dir, pages): doc, tag, text = Doc().tagtext() doc.asis('<!DOCTYPE html>') with tag('html'): with tag('style', type="text/css"): text('table { border-collapse: collapse } ') text('table, th, td { border: 1px solid black; padding: 4px } ') with tag('body'): with tag('h1'): text('Directory Listing for /' + target_dir) doc.stag('hr') with tag('table'): with tag('tr'): with tag('td'): with tag('a', href='/' + parent_dir): text('Parent Directory') for page in pages: page_path = PurePath(page) with tag('tr'): with tag('td'): with tag('a', href='/' + page): text(page_path.name) return indent(doc.getvalue(), newline='\r\n')
def path_splitter(flat_key): keys = PurePath(flat_key).parts return keys
def get_slug(path): if path == 'site/index.html': return '' else: return str(PurePath(path).parent.name)
def get_class(img_path): return int(PurePath(img_path).parts[-2])