Esempio n. 1
0
def get_files_in_directory(path, file_type):
    """
    Gets the list of files in the directory and subdirectories
    Respects .russellignore file if present
    """
    local_files = []
    separator = os.path.sep
    ignore_list, whitelist = RussellIgnoreManager.get_list()

    # make sure that subdirectories are also excluded
    ignore_list_expanded = ignore_list + [
        "{}/**".format(item) for item in ignore_list
    ]
    russell_logger.debug("Ignoring list : {}".format(ignore_list))
    total_file_size = 0

    for root, dirs, files in os.walk(path):
        russell_logger.debug("Root:{}, Dirs:{}".format(root, dirs))
        ignore_dir = False
        normalized_path = normalize_path(path, root)
        for item in ignore_list_expanded:
            if PurePath(normalized_path).match(item):
                ignore_dir = True
                break

        if ignore_dir:
            # Reset dirs to avoid going further down this directory
            dirs[:] = []
            russell_logger.debug("Ignoring directory : {}".format(root))
            continue

        for file_name in files:
            ignore_file = False
            normalized_path = normalize_path(path,
                                             os.path.join(root, file_name))
            for item in ignore_list_expanded:
                if PurePath(normalized_path).match(item):
                    ignore_file = True
                    break

            if ignore_file:
                russell_logger.debug(
                    "Ignoring file : {}".format(normalized_path))
                continue

            file_relative_path = os.path.join(root, file_name)
            if separator != '/':  # convert relative paths to Unix style
                file_relative_path = file_relative_path.replace(
                    os.path.sep, '/')
            file_full_path = os.path.join(os.getcwd(), root, file_name)

            local_files.append(
                (file_type, (file_relative_path, open(file_full_path,
                                                      'rb'), 'text/plain')))
            total_file_size += os.path.getsize(file_full_path)

    return (local_files, sizeof_fmt(total_file_size), total_file_size)
Esempio n. 2
0
def glob_files(root_dir, includes=None, excludes=None, gitignore=None):
    """Powerful and flexible utility to search and tag files using patterns.
    :param root_dir: directory where we start the search
    :param includes: list or iterator of include pattern tuples (pattern, tag)
    :param excludes: list or iterator of exclude patterns
    :param gitignore: list of ignore patterns (gitwildcard format)
    :return: iterator of (absolute_path, relative_path)
    """
    # docu here: https://docs.python.org/3/library/pathlib.html
    if not includes:
        includes = ['**']
    else:
        # we need to iterate multiple times (iterator safeguard)
        includes = list(includes)

    if excludes:
        # we need to iterate multiple times (iterator safeguard)
        excludes = list(excludes)

    if gitignore:
        spec = pathspec.PathSpec.from_lines('gitwildmatch', gitignore)
        log.debug('gitignore patterns: %s', gitignore)

    while includes:
        pattern = includes.pop(0)
        # for compatibility with std. python Lib/glop.py:
        # >>>If recursive is true, the pattern '**' will match any files and
        #    zero or more directories and subdirectories.<<<
        if pattern.endswith('**'):
            pattern += '/*'
        matches = list(Path(root_dir).glob(pattern))

        for m in matches:
            if m.is_dir():
                continue

            # some discussion on how to convert a pattern into regex:
            # http://stackoverflow.com/questions/27726545/python-glob-but-against-a-list-of-strings-rather-than-the-filesystem
            pp = PurePath(m)

            # check if m is contained in remaining include patterns
            # (last one wins)
            if includes and any(map(lambda p: pp.match(p), includes)):
                continue

            # check if m is contained in exclude pattern
            if excludes and any(map(lambda p: pp.match(p), excludes)):
                continue

            # check if m is contained in finkignore
            if gitignore and spec.match_file(str(m)):
                log.debug('Skipped file \'%s\' due to gitignore pattern',
                          str(m.relative_to(root_dir)))
                continue

            yield (str(m), str(m.relative_to(root_dir)))
Esempio n. 3
0
def path2unix(path, nojoin=False, fromwinpath=False):
    '''From a path given in any format, converts to posix path format
    fromwinpath=True forces the input path to be recognized as a Windows path (useful on Unix machines to unit test Windows paths)'''
    if fromwinpath:
        pathparts = list(PureWindowsPath(path).parts)
    else:
        pathparts = list(PurePath(path).parts)
    if nojoin:
        return pathparts
    else:
        return posixpath.join(*pathparts)
Esempio n. 4
0
def matches_glob_list(path, glob_list):
    """
    Given a list of glob patterns, returns a boolean
    indicating if a path matches any glob in the list
    """
    for glob in glob_list:
        try:
            if PurePath(path).match(glob):
                return True
        except TypeError:
            pass
    return False
Esempio n. 5
0
def create_html_index(target_dir, parent_dir, pages):
    doc, tag, text = Doc().tagtext()
    doc.asis('<!DOCTYPE html>')
    with tag('html'):
        with tag('style', type="text/css"):
            text('table { border-collapse: collapse } ')
            text('table, th, td { border: 1px solid black; padding: 4px } ')
        with tag('body'):
            with tag('h1'):
                text('Directory Listing for /' + target_dir)
            doc.stag('hr')
            with tag('table'):
                with tag('tr'):
                    with tag('td'):
                        with tag('a', href='/' + parent_dir):
                            text('Parent Directory')
                for page in pages:
                    page_path = PurePath(page)
                    with tag('tr'):
                        with tag('td'):
                            with tag('a', href='/' + page):
                                text(page_path.name)

    return indent(doc.getvalue(), newline='\r\n')
Esempio n. 6
0
def path_splitter(flat_key):
    keys = PurePath(flat_key).parts
    return keys
Esempio n. 7
0
def get_slug(path):
    if path == 'site/index.html':
        return ''
    else:
        return str(PurePath(path).parent.name)
Esempio n. 8
0
def get_class(img_path):
    return int(PurePath(img_path).parts[-2])