Beispiel #1
0
def do_scanning(ctx):
    """
    Returns True if file scanning is necessary regarding command-line arguments

    :param esgprep.drs.context.ProcessingContext ctx: New processing context to evaluate
    :returns: True if file scanning is necessary
    :rtype: *boolean*
    """
    if ctx.rescan:
        return True
    elif ctx.action == 'list':
        return True
    elif os.path.isfile(TREE_FILE):
        reader = load(TREE_FILE)
        old_args = next(reader)
        # Ensure that processing context is similar to previous step
        for k in CONTROLLED_ARGS:
            if getattr(ctx, k) != old_args[k]:
                msg = '"{}" argument has changed: "{}" instead of "{}" -- '.format(
                    k, getattr(ctx, k), old_args[k])
                msg += 'Rescanning files.'
                Print.warning(msg)
                return True
        return False
    else:
        return True
Beispiel #2
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Warn user about unconsidered hard-coded elements
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element):
             msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \
                   'are not supported.'.format(pattern_element)
             if self.pbar:
                 print(msg)
             logging.warning(msg)
             break
     self.facets = self.cfg.get_facets('directory_format')
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Disable file scan if a previous DRS tree have generated using same context and no "list" action
     if not self.rescan and self.action != 'list' and os.path.isfile(
             TREE_FILE):
         reader = load(TREE_FILE)
         old_args = reader.next()
         # Ensure that processing context is similar to previous step
         if self.check_args(old_args):
             self.scan = False
     # Init data collector
     if self.pbar:
         self.sources = Collector(sources=self.directory, data=self)
     else:
         self.sources = Collector(sources=self.directory,
                                  spinner=False,
                                  data=self)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter[uuid()] = ('^.*\.nc$', False)
     # And exclude hidden files
     self.sources.FileFilter[uuid()] = ('^\..*$', True)
     # Init progress bar
     if self.pbar:
         nfiles = len(self.sources)
         self.pbar = tqdm(
             desc='Scanning incoming files',
             total=nfiles,
             bar_format=
             '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
             ncols=100,
             file=sys.stdout)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self
Beispiel #3
0
def run(args):
    """
    Main process that:

     * Instantiates processing context,
     * Loads previous program instance,
     * Parallelizes file processing with threads pools,
     * Apply command-line action to the whole DRS tree,
     * Evaluate exit status.

    :param ArgumentParser args: The command-line arguments parser

    """
    # Instantiate processing context
    with ProcessingContext(args) as ctx:
        logging.info('==> Scan started')
        if not ctx.scan:
            reader = load(TREE_FILE)
            _ = reader.next()
            ctx.tree = reader.next()
            ctx.scan_err_log = reader.next()
            results = reader.next()
            # Rollback --commands_file value to command-line argument in any case
            ctx.tree.commands_file = ctx.commands_file
            msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \
                  'Using cached DRS tree from {}'.format(TREE_FILE)
            if ctx.pbar:
                print(msg)
            logging.warning(msg)
        else:
            if ctx.use_pool:
                processes = ctx.pool.imap(process, ctx.sources)
            else:
                processes = itertools.imap(process, ctx.sources)
            # Process supplied files
            results = [x for x in processes]
        # Close progress bar
        if ctx.pbar:
            ctx.pbar.close()
        # Get number of files scanned (including skipped files)
        ctx.scan_files = len(results)
        # Get number of scan errors
        ctx.scan_errors = results.count(None)
        # Backup tree context for later usage with other command lines
        store(TREE_FILE, data=[{key: ctx.__getattribute__(key) for key in CONTROLLED_ARGS},
                               ctx.tree,
                               ctx.scan_err_log,
                               results])
        logging.warning('DRS tree recorded for next usage onto {}.'.format(TREE_FILE))
        # Evaluates the scan results to trigger the DRS tree action
        if evaluate(results):
            # Check upgrade uniqueness
            ctx.tree.check_uniqueness(ctx.checksum_type)
            # Apply tree action
            ctx.tree.get_display_lengths()
            getattr(ctx.tree, ctx.action)()
Beispiel #4
0
def run(args):
    """
    Main process that:

     * Instantiates processing context,
     * Loads previous program instance,
     * Parallelizes file processing with threads pools,
     * Apply command-line action to the whole DRS tree,
     * Evaluate exit status.

    :param ArgumentParser args: The command-line arguments parser

    """
    # Instantiate processing context
    with ProcessingContext(args) as ctx:
        # Init global variable
        global tree
        # Init DRS tree
        tree = DRSTree(ctx.root, ctx.version, ctx.mode, ctx.commands_file)
        # Init process context
        cctx = {name: getattr(ctx, name) for name in PROCESS_VARS}
        # Disable file scan if a previous DRS tree have generated using same context and no "list" action
        if do_scanning(ctx):
            if ctx.use_pool:
                # Init processes pool
                pool = Pool(processes=ctx.processes,
                            initializer=initializer,
                            initargs=(list(cctx.keys()), list(cctx.values())))
                processes = pool.imap(process, ctx.sources)
            else:
                initializer(list(cctx.keys()), list(cctx.values()))
                processes = map(process, ctx.sources)
            # Process supplied sources
            handlers = [x for x in processes]
            # Close pool of workers if exists
            if 'pool' in list(locals().keys()):
                locals()['pool'].close()
                locals()['pool'].join()
            Print.progress('\n')
            # Build DRS tree
            cctx['progress'].value = 0
            initializer(list(cctx.keys()), list(cctx.values()))
            handlers = [h for h in handlers if h is not None]
            results = [x for x in map(tree_builder, handlers)]
            Print.progress('\n')
        else:
            reader = load(TREE_FILE)
            msg = 'Skip incoming files scan (use "--rescan" to force it) -- '
            msg += 'Using cached DRS tree from {}'.format(TREE_FILE)
            Print.warning(msg)
            _ = next(reader)
            tree = next(reader)
            handlers = next(reader)
            results = next(reader)
        # Flush buffer
        Print.flush()
        # Rollback --commands-file value to command-line argument in any case
        tree.commands_file = ctx.commands_file
        # Get number of files scanned (including errors/skipped files)
        ctx.scan_data = len(results)
        # Get number of scan errors
        ctx.scan_errors = results.count(None)
        # Backup tree context for later usage with other command lines
        store(
            TREE_FILE,
            data=[{key: ctx.__getattribute__(key)
                   for key in CONTROLLED_ARGS}, tree, handlers, results])
        Print.info(TAGS.INFO +
                   'DRS tree recorded for next usage onto {}.'.format(
                       COLORS.HEADER(TREE_FILE)))
        # Evaluates the scan results to trigger the DRS tree action
        if evaluate(results):
            # Check upgrade uniqueness
            tree.check_uniqueness()
            # Apply tree action
            tree.get_display_lengths()
            getattr(tree, ctx.action)()
    # Evaluate errors and exit with appropriated return code
    if ctx.scan_errors > 0:
        sys.exit(ctx.scan_errors)
Beispiel #5
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # Check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Get DRS facets
     self.facets = self.cfg.get_facets('directory_format')
     # Raise error when %(version)s is not part of the final directory format
     if 'version' not in self.facets:
         raise NoVersionPattern(self.cfg.get('directory_format'),
                                self.facets)
     # Consider hard-coded elements in directory format
     idx = 0
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         try:
             # If pattern is %(...)s
             # Get its index in the list of facets
             key = re.match(re.compile(r'%\(([\w]+)\)s'),
                            pattern_element).groups()[0]
             idx = self.facets.index(key)
         except AttributeError:
             # If pattern is not %(...)s
             # Generate a uuid()
             key = str(uuid())
             # Insert hard-coded string in self.facets to be part of DRS path
             self.facets.insert(idx + 1, key)
             # Set the value using --set-value
             self.set_values[key] = pattern_element
             # Add the uuid to the ignored keys
             IGNORED_KEYS.append(key)
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Disable file scan if a previous DRS tree have generated using same context and no "list" action
     if not self.rescan and self.action != 'list' and os.path.isfile(
             TREE_FILE):
         reader = load(TREE_FILE)
         old_args = reader.next()
         # Ensure that processing context is similar to previous step
         if self.check_args(old_args):
             self.scan = False
     # Init data collector
     if self.pbar:
         self.sources = Collector(sources=self.directory, data=self)
     else:
         self.sources = Collector(sources=self.directory,
                                  spinner=False,
                                  data=self)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter.add(regex='^.*\.nc$')
     # And exclude hidden files
     self.sources.FileFilter.add(regex='^\..*$', inclusive=False)
     # Init progress bar
     if self.scan:
         nfiles = len(self.sources)
         if self.pbar and nfiles:
             self.pbar = tqdm(
                 desc='Scanning incoming files',
                 total=nfiles,
                 bar_format=
                 '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
                 ncols=100,
                 file=sys.stdout)
     else:
         msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \
               'Using cached DRS tree from {}'.format(TREE_FILE)
         if self.pbar:
             print(msg)
         logging.warning(msg)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self