def yield_files_from_tree(ctx): """ Yields datasets to process. Only the "dataset part" of the DRS tree is returned (i.e., from "root" to the facet before the "version" facet). :param esgprep.checkvocab.main.ProcessingContext ctx: The processing context :returns: The dataset as a part of the DRS tree :rtype: *iter* """ for directory in ctx.directory: for root, _, filenames in utils.walk(directory, downstream=True, followlinks=True): if "/files/" not in root: for filename in filenames: ffp = os.path.join(root, filename) if os.path.isfile(ffp) and re.match(ctx.filter, filename) is not None: yield ffp
def yield_inputs(ctx): """ Yields all files to process within tuples with the processing context. The file walking through the DRS tree follows the latest version of each dataset. This behavior is modified using: * ``--all-versions`` flag, to pick up all versions, * ``--version <version_number>`` argument, to pick up a specified version, * ``--latest-symlink`` flag, to pick up the version pointed by the latest symlink (if exists). If the supplied directory to scan specifies the version into its path, only this version is picked up as with ``--version`` argument. :param esgprep.mapfile.main.ProcessingContext ctx: The processing context :returns: Attach the processing context to a file to process as an iterator of tuples :rtype: *iter* """ for directory in ctx.directory: # Compile directory_format regex without <filename> part regex = re.compile(ctx.pattern.split('/(?P<filename>')[0] + '$') # Set --version flag if version number is included in the supplied directory path while 'version' in regex.groupindex.keys(): if regex.search(directory): version = regex.search(directory).groupdict()['version'] # If supplied directory has the version number, disable other flags if version == 'latest': ctx.all, ctx.latest, ctx.version = None, True, None else: ctx.all, ctx.latest, ctx.version = None, False, version break else: regex = re.compile('/'.join(regex.pattern.split('/')[:-1])) # Walk trough the DRS tree for root, _, filenames in utils.walk(directory, downstream=True, followlinks=True): if '/files/' not in root: for filename in filenames: ffp = os.path.join(root, filename) if os.path.isfile(ffp) and re.match(ctx.filter, filename) is not None: yield ffp, ctx