def do_scanning(ctx): """ Returns True if file scanning is necessary regarding command-line arguments :param esgprep.drs.context.ProcessingContext ctx: New processing context to evaluate :returns: True if file scanning is necessary :rtype: *boolean* """ if ctx.rescan: return True elif ctx.action == 'list': return True elif os.path.isfile(TREE_FILE): reader = load(TREE_FILE) old_args = next(reader) # Ensure that processing context is similar to previous step for k in CONTROLLED_ARGS: if getattr(ctx, k) != old_args[k]: msg = '"{}" argument has changed: "{}" instead of "{}" -- '.format( k, getattr(ctx, k), old_args[k]) msg += 'Rescanning files.' Print.warning(msg) return True return False else: return True
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # check if --commands-file argument specifies existing file self.check_existing_commands_file() # Warn user about unconsidered hard-coded elements for pattern_element in self.cfg.get('directory_format').strip().split( "/"): if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element): msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \ 'are not supported.'.format(pattern_element) if self.pbar: print(msg) logging.warning(msg) break self.facets = self.cfg.get_facets('directory_format') self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter[uuid()] = ('^.*\.nc$', False) # And exclude hidden files self.sources.FileFilter[uuid()] = ('^\..*$', True) # Init progress bar if self.pbar: nfiles = len(self.sources) self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self
def run(args): """ Main process that: * Instantiates processing context, * Loads previous program instance, * Parallelizes file processing with threads pools, * Apply command-line action to the whole DRS tree, * Evaluate exit status. :param ArgumentParser args: The command-line arguments parser """ # Instantiate processing context with ProcessingContext(args) as ctx: logging.info('==> Scan started') if not ctx.scan: reader = load(TREE_FILE) _ = reader.next() ctx.tree = reader.next() ctx.scan_err_log = reader.next() results = reader.next() # Rollback --commands_file value to command-line argument in any case ctx.tree.commands_file = ctx.commands_file msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \ 'Using cached DRS tree from {}'.format(TREE_FILE) if ctx.pbar: print(msg) logging.warning(msg) else: if ctx.use_pool: processes = ctx.pool.imap(process, ctx.sources) else: processes = itertools.imap(process, ctx.sources) # Process supplied files results = [x for x in processes] # Close progress bar if ctx.pbar: ctx.pbar.close() # Get number of files scanned (including skipped files) ctx.scan_files = len(results) # Get number of scan errors ctx.scan_errors = results.count(None) # Backup tree context for later usage with other command lines store(TREE_FILE, data=[{key: ctx.__getattribute__(key) for key in CONTROLLED_ARGS}, ctx.tree, ctx.scan_err_log, results]) logging.warning('DRS tree recorded for next usage onto {}.'.format(TREE_FILE)) # Evaluates the scan results to trigger the DRS tree action if evaluate(results): # Check upgrade uniqueness ctx.tree.check_uniqueness(ctx.checksum_type) # Apply tree action ctx.tree.get_display_lengths() getattr(ctx.tree, ctx.action)()
def run(args): """ Main process that: * Instantiates processing context, * Loads previous program instance, * Parallelizes file processing with threads pools, * Apply command-line action to the whole DRS tree, * Evaluate exit status. :param ArgumentParser args: The command-line arguments parser """ # Instantiate processing context with ProcessingContext(args) as ctx: # Init global variable global tree # Init DRS tree tree = DRSTree(ctx.root, ctx.version, ctx.mode, ctx.commands_file) # Init process context cctx = {name: getattr(ctx, name) for name in PROCESS_VARS} # Disable file scan if a previous DRS tree have generated using same context and no "list" action if do_scanning(ctx): if ctx.use_pool: # Init processes pool pool = Pool(processes=ctx.processes, initializer=initializer, initargs=(list(cctx.keys()), list(cctx.values()))) processes = pool.imap(process, ctx.sources) else: initializer(list(cctx.keys()), list(cctx.values())) processes = map(process, ctx.sources) # Process supplied sources handlers = [x for x in processes] # Close pool of workers if exists if 'pool' in list(locals().keys()): locals()['pool'].close() locals()['pool'].join() Print.progress('\n') # Build DRS tree cctx['progress'].value = 0 initializer(list(cctx.keys()), list(cctx.values())) handlers = [h for h in handlers if h is not None] results = [x for x in map(tree_builder, handlers)] Print.progress('\n') else: reader = load(TREE_FILE) msg = 'Skip incoming files scan (use "--rescan" to force it) -- ' msg += 'Using cached DRS tree from {}'.format(TREE_FILE) Print.warning(msg) _ = next(reader) tree = next(reader) handlers = next(reader) results = next(reader) # Flush buffer Print.flush() # Rollback --commands-file value to command-line argument in any case tree.commands_file = ctx.commands_file # Get number of files scanned (including errors/skipped files) ctx.scan_data = len(results) # Get number of scan errors ctx.scan_errors = results.count(None) # Backup tree context for later usage with other command lines store( TREE_FILE, data=[{key: ctx.__getattribute__(key) for key in CONTROLLED_ARGS}, tree, handlers, results]) Print.info(TAGS.INFO + 'DRS tree recorded for next usage onto {}.'.format( COLORS.HEADER(TREE_FILE))) # Evaluates the scan results to trigger the DRS tree action if evaluate(results): # Check upgrade uniqueness tree.check_uniqueness() # Apply tree action tree.get_display_lengths() getattr(tree, ctx.action)() # Evaluate errors and exit with appropriated return code if ctx.scan_errors > 0: sys.exit(ctx.scan_errors)
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Check if --commands-file argument specifies existing file self.check_existing_commands_file() # Get DRS facets self.facets = self.cfg.get_facets('directory_format') # Raise error when %(version)s is not part of the final directory format if 'version' not in self.facets: raise NoVersionPattern(self.cfg.get('directory_format'), self.facets) # Consider hard-coded elements in directory format idx = 0 for pattern_element in self.cfg.get('directory_format').strip().split( "/"): try: # If pattern is %(...)s # Get its index in the list of facets key = re.match(re.compile(r'%\(([\w]+)\)s'), pattern_element).groups()[0] idx = self.facets.index(key) except AttributeError: # If pattern is not %(...)s # Generate a uuid() key = str(uuid()) # Insert hard-coded string in self.facets to be part of DRS path self.facets.insert(idx + 1, key) # Set the value using --set-value self.set_values[key] = pattern_element # Add the uuid to the ignored keys IGNORED_KEYS.append(key) self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter.add(regex='^.*\.nc$') # And exclude hidden files self.sources.FileFilter.add(regex='^\..*$', inclusive=False) # Init progress bar if self.scan: nfiles = len(self.sources) if self.pbar and nfiles: self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) else: msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \ 'Using cached DRS tree from {}'.format(TREE_FILE) if self.pbar: print(msg) logging.warning(msg) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self