Ejemplo n.º 1
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Warn user about unconsidered hard-coded elements
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element):
             msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \
                   'are not supported.'.format(pattern_element)
             if self.pbar:
                 print(msg)
             logging.warning(msg)
             break
     self.facets = self.cfg.get_facets('directory_format')
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Disable file scan if a previous DRS tree have generated using same context and no "list" action
     if not self.rescan and self.action != 'list' and os.path.isfile(
             TREE_FILE):
         reader = load(TREE_FILE)
         old_args = reader.next()
         # Ensure that processing context is similar to previous step
         if self.check_args(old_args):
             self.scan = False
     # Init data collector
     if self.pbar:
         self.sources = Collector(sources=self.directory, data=self)
     else:
         self.sources = Collector(sources=self.directory,
                                  spinner=False,
                                  data=self)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter[uuid()] = ('^.*\.nc$', False)
     # And exclude hidden files
     self.sources.FileFilter[uuid()] = ('^\..*$', True)
     # Init progress bar
     if self.pbar:
         nfiles = len(self.sources)
         self.pbar = tqdm(
             desc='Scanning incoming files',
             total=nfiles,
             bar_format=
             '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
             ncols=100,
             file=sys.stdout)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self
Ejemplo n.º 2
0
 def __enter__(self):
     super(ProcessingContext, self).__enter__()
     # Get the DRS facet keys from pattern
     self.facets = list()
     self.facets = list(
         re.compile(
             self.cfg.translate(
                 'directory_format',
                 add_ending_filename=True)).groupindex.keys())
     self.facets.extend(
         list(
             re.compile(
                 self.cfg.translate('dataset_id')).groupindex.keys()))
     self.facets = set(self.facets).difference(set(IGNORED_KEYS))
     # Init data collector
     if self.directory:
         # The source is a list of directories
         self.source_type = 'file'
         self.sources = PathCollector(sources=self.directory)
         # Init file filter
         for regex, inclusive in self.file_filter:
             self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
         # Init dir filter
         self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
         self.pattern = self.cfg.translate('directory_format',
                                           add_ending_filename=True)
     elif self.incoming:
         # The source is a dataset ID (potentially from stdin)
         self.source_type = 'file'
         self.sources = Collector(sources=self.incoming)
         # Init file filter
         for regex, inclusive in self.file_filter:
             self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
         # Init dir filter
         self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
         # Translate dataset_id format
         self.pattern = self.cfg.translate('filename_format')
     elif self.dataset_id:
         # The source is a dataset ID (potentially from stdin)
         self.source_type = 'dataset'
         self.sources = DatasetCollector(sources=[self.dataset_id],
                                         versioned=False)
         # Translate dataset_id format
         self.pattern = self.cfg.translate('dataset_id')
     else:
         # The source is a list of files (i.e., several dataset lists)
         # Has to be tested at the end because args.dataset_list never None, see __init__ comment.
         self.source_type = 'dataset'
         self.sources = DatasetCollector(sources=[
             x.strip() for x in self.dataset_list.readlines() if x.strip()
         ],
                                         versioned=False)
         self.pattern = self.cfg.translate('dataset_id')
     # Get number of sources
     self.nbsources = len(self.sources)
     return self
Ejemplo n.º 3
0
 def __enter__(self):
     super(ProcessingContext, self).__enter__()
     # Get the DRS facet keys from pattern
     self.facets = self.cfg.get_facets('directory_format')
     # Check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Raise error when %(version)s is not part of the final directory format
     if 'version' not in self.facets:
         raise NoVersionPattern(self.cfg.get('directory_format'),
                                self.facets)
     # Consider hard-coded elements in directory format
     idx = 0
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         try:
             # If pattern is %(...)s, get its index in the list of facets
             key = re.match(re.compile(r'%\(([\w]+)\)s'),
                            pattern_element).groups()[0]
             idx = self.facets.index(key)
         except AttributeError:
             # If pattern is not %(...)s, generate a uuid()
             key = str(uuid())
             # Insert hard-coded string in self.facets to be part of DRS path
             self.facets.insert(idx + 1, key)
             # Set the value using --set-value
             self.set_values[key] = pattern_element
             # Add the uuid to the ignored keys
             IGNORED_KEYS.append(key)
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Init data collector
     self.sources = Collector(sources=self.directory)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter.add(regex='^.*\.nc$')
     # And exclude hidden files
     self.sources.FileFilter.add(regex='^\..*$', inclusive=False)
     # Get number of sources
     self.nbsources = len(self.sources)
     return self
Ejemplo n.º 4
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # Check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Get DRS facets
     self.facets = self.cfg.get_facets('directory_format')
     # Raise error when %(version)s is not part of the final directory format
     if 'version' not in self.facets:
         raise NoVersionPattern(self.cfg.get('directory_format'),
                                self.facets)
     # Consider hard-coded elements in directory format
     idx = 0
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         try:
             # If pattern is %(...)s
             # Get its index in the list of facets
             key = re.match(re.compile(r'%\(([\w]+)\)s'),
                            pattern_element).groups()[0]
             idx = self.facets.index(key)
         except AttributeError:
             # If pattern is not %(...)s
             # Generate a uuid()
             key = str(uuid())
             # Insert hard-coded string in self.facets to be part of DRS path
             self.facets.insert(idx + 1, key)
             # Set the value using --set-value
             self.set_values[key] = pattern_element
             # Add the uuid to the ignored keys
             IGNORED_KEYS.append(key)
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Disable file scan if a previous DRS tree have generated using same context and no "list" action
     if not self.rescan and self.action != 'list' and os.path.isfile(
             TREE_FILE):
         reader = load(TREE_FILE)
         old_args = reader.next()
         # Ensure that processing context is similar to previous step
         if self.check_args(old_args):
             self.scan = False
     # Init data collector
     if self.pbar:
         self.sources = Collector(sources=self.directory, data=self)
     else:
         self.sources = Collector(sources=self.directory,
                                  spinner=False,
                                  data=self)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter.add(regex='^.*\.nc$')
     # And exclude hidden files
     self.sources.FileFilter.add(regex='^\..*$', inclusive=False)
     # Init progress bar
     if self.scan:
         nfiles = len(self.sources)
         if self.pbar and nfiles:
             self.pbar = tqdm(
                 desc='Scanning incoming files',
                 total=nfiles,
                 bar_format=
                 '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
                 ncols=100,
                 file=sys.stdout)
     else:
         msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \
               'Using cached DRS tree from {}'.format(TREE_FILE)
         if self.pbar:
             print(msg)
         logging.warning(msg)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self