def __enter__(self): # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Init data collector if self.directory: # The source is a list of directories # Instantiate file collector to walk through the tree self.source_type = 'files' if self.pbar: self.sources = PathCollector(sources=self.directory) else: self.sources = PathCollector(sources=self.directory, spinner=False) # Init file filter for file_filter in self.file_filter: self.sources.FileFilter[uuid()] = file_filter # Init dir filter self.sources.PathFilter['base_filter'] = (self.dir_filter, True) self.pattern = self.cfg.translate('directory_format', filename_pattern=True) else: # The source is a list of files (i.e., several dataset lists) # Instantiate dataset collector to parse the files self.source_type = 'datasets' if self.pbar: self.sources = DatasetCollector(self.dataset_list) else: self.sources = DatasetCollector(self.dataset_list, spinner=False) self.pattern = self.cfg.translate('dataset_id') # Get the facet keys from pattern self.facets = set(re.compile(self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS)) # Init progress bar if self.pbar: self.sources = as_pbar(self.sources, desc='Harvesting facets values from source', units=self.source_type) return self
def get_mapfile_drs(self): try: _cfg = SectionParser(section='config:{}'.format(self.project), directory=self.config_dir) mapfile_drs = _cfg.get('mapfile_drs') _cfg.reset() except (NoConfigOption, NoConfigSection): mapfile_drs = None return mapfile_drs
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # check if --commands-file argument specifies existing file self.check_existing_commands_file() # Warn user about unconsidered hard-coded elements for pattern_element in self.cfg.get('directory_format').strip().split( "/"): if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element): msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \ 'are not supported.'.format(pattern_element) if self.pbar: print(msg) logging.warning(msg) break self.facets = self.cfg.get_facets('directory_format') self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter[uuid()] = ('^.*\.nc$', False) # And exclude hidden files self.sources.FileFilter[uuid()] = ('^\..*$', True) # Init progress bar if self.pbar: nfiles = len(self.sources) self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self
def __enter__(self): # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Init data collector if self.directory: # The source is a list of directories # Instantiate file collector to walk through the tree self.source_type = 'files' if self.pbar: self.sources = PathCollector(sources=self.directory) else: self.sources = PathCollector(sources=self.directory, spinner=False) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) self.pattern = self.cfg.translate('directory_format', add_ending_filename=True) else: # The source is a list of files (i.e., several dataset lists) # Instantiate dataset collector to parse the files self.source_type = 'datasets' if self.pbar: self.sources = DatasetCollector(source=[ x.strip() for x in self.dataset_list.readlines() if x.strip() ], versioned=False) else: self.sources = DatasetCollector(source=[ x.strip() for x in self.dataset_list.readlines() if x.strip() ], spinner=False, versioned=False) self.pattern = self.cfg.translate('dataset_id') # Get the facet keys from pattern self.facets = set(re.compile( self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS)) # Init progress bar nfiles = len(self.sources) if self.pbar and nfiles: self.sources = tqdm( self.sources, desc='Harvesting facets values from data', total=nfiles, bar_format='{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} ' + self.source_type, ncols=100, file=sys.stdout) return self
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) self.facets = self.cfg.get_facets('dataset_id') self.pattern = self.cfg.translate('directory_format', filename_pattern=True) # Get mapfile DRS is set in configuration file try: self.mapfile_drs = self.cfg.get('mapfile_drs') except NoConfigOption: self.mapfile_drs = None # Init data collector if self.pbar: self.sources = VersionedPathCollector( sources=self.directory, data=self, dir_format=self.cfg.translate('directory_format')) else: self.sources = VersionedPathCollector( sources=self.directory, data=self, spinner=False, dir_format=self.cfg.translate('directory_format')) # Init file filter for file_filter in self.file_filter: self.sources.FileFilter[uuid()] = file_filter # Init dir filter self.sources.PathFilter['base_filter'] = (self.dir_filter, True) if self.all: # Pick up all encountered versions by adding "/latest" exclusion self.sources.PathFilter['version_filter'] = ('/latest', True) elif self.version: # Pick up the specified version only (--version flag) by adding "/v{version}" inclusion # If --latest-symlink, --version is set to "latest" self.sources.PathFilter['version_filter'] = '/{}'.format( self.version) # Init progress bar if self.pbar: nfiles = len(self.sources) self.pbar = tqdm( desc='Mapfile(s) generation', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self
def is_simulation_completed(card_path): """ Returns True if the simulation is completed. :param str card_path: Directory including run.card :returns: True if the simulation is completed :rtype: *boolean* """ # Check cards exist if RUN_CARD not in os.listdir(card_path): raise NoRunCardFound(card_path) else: run_card = os.path.join(card_path, RUN_CARD) # Extract info from cards config = SectionParser('Configuration') config.read(run_card) return config.get('periodstate').strip('"') == 'Completed'
def get_checksum_type(self): """ Gets the checksum type to use. Be careful to Exception constants by reading two different sections. :returns: The checksum type :rtype: *str* """ _cfg = SectionParser(section='DEFAULT', directory=self.config_dir) if _cfg.has_option('checksum', section='DEFAULT'): checksum_type = _cfg.get_options_from_table( 'checksum')[0][1].lower() else: # Use SHA256 as default because esg.ini not mandatory in configuration directory checksum_type = 'sha256' if checksum_type not in checksum_types: raise InvalidChecksumType(checksum_type) return checksum_type
def __enter__(self): super(MultiprocessingContext, self).__enter__() # Get checksum client self.checksum_type = self.get_checksum_type() # Get mapfile DRS self.mapfile_drs = self.get_mapfile_drs() # Configuration parser to be loaded in the end self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) return self
def __enter__(self): # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Exclude fixed frequency in any case self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)', inclusive=False) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) # Set driving time properties tinit = TimeInit(ref=self.sources.first(), tunits_default=self.tunits_default) if not self.ref_calendar: self.ref_calendar = tinit.calendar if not self.ref_units: self.ref_units = tinit.tunits # Get project id if not self.project: self.project = get_project(self.sources.first()) # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) self.pattern = self.cfg.translate('filename_format') return self
def yield_xml_from_card(card_path): """ Yields XML path from run.card and config.card attributes. :param str card_path: Directory including run.card and config.card :returns: The XML paths to use :rtype: *iter* """ # Check cards exist if RUN_CARD not in os.listdir(card_path): raise NoRunCardFound(card_path) else: run_card = os.path.join(card_path, RUN_CARD) if CONF_CARD not in os.listdir(card_path): raise NoConfigCardFound(card_path) else: conf_card = os.path.join(card_path, CONF_CARD) # Extract config info from config.card config = SectionParser('UserChoices') config.read(conf_card) xml_attrs = dict() xml_attrs['root'] = FILEDEF_ROOT xml_attrs['longname'] = config.get('longname').strip('"') xml_attrs['experimentname'] = config.get('experimentname').strip('"') if config.has_option('modelname'): xml_attrs['modelname'] = config.get('modelname').strip('"') else: xml_attrs['modelname'] = 'IPSL-CM6A-LR' xml_attrs['member'] = config.get('member').strip('"') # Extract first and last simulated years from run.card with open(run_card, 'r') as f: lines = f.read().split('\n') # Get run table without header lines = [line for line in lines if line.count('|') == 8][1:] year_start = int(lines[0].split()[3][:4]) year_end = int(lines[-1].split()[5][:4]) for year in range(year_start, year_end + 1): xml_attrs['year'] = str(year) yield FILEDEF_DIRECTORY_FORMAT.format(**xml_attrs)
class ProcessingContext(object): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: The command-line arguments parser :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): self.pbar = args.pbar self.project = args.project self.config_dir = args.i self.directory = args.directory self.dataset_list = args.dataset_list self.dir_filter = args.ignore_dir self.file_filter = [] if args.include_file: self.file_filter.extend([(f, False) for f in args.include_file]) else: # Default includes netCDF only self.file_filter.append(('^.*\.nc$', False)) if args.exclude_file: # Default exclude hidden files self.file_filter.extend([(f, True) for f in args.exclude_file]) else: self.file_filter.append(('^\..*$', True)) self.scan_errors = 0 self.any_undeclared = False def __enter__(self): # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Init data collector if self.directory: # The source is a list of directories # Instantiate file collector to walk through the tree self.source_type = 'files' if self.pbar: self.sources = PathCollector(sources=self.directory) else: self.sources = PathCollector(sources=self.directory, spinner=False) # Init file filter for file_filter in self.file_filter: self.sources.FileFilter[uuid()] = file_filter # Init dir filter self.sources.PathFilter['base_filter'] = (self.dir_filter, True) self.pattern = self.cfg.translate('directory_format', filename_pattern=True) else: # The source is a list of files (i.e., several dataset lists) # Instantiate dataset collector to parse the files self.source_type = 'datasets' if self.pbar: self.sources = DatasetCollector(self.dataset_list) else: self.sources = DatasetCollector(self.dataset_list, spinner=False) self.pattern = self.cfg.translate('dataset_id') # Get the facet keys from pattern self.facets = set(re.compile(self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS)) # Init progress bar if self.pbar: self.sources = as_pbar(self.sources, desc='Harvesting facets values from source', units=self.source_type) return self def __exit__(self, *exc): # Default is sys.exit(0) if self.scan_errors > 0: print('{}: {} (see {})'.format('Scan errors', self.scan_errors, logging.getLogger().handlers[0].baseFilename)) sys.exit(1) if self.any_undeclared: print('Please update "esg.{}.ini" following: {}'.format(self.project, logging.getLogger().handlers[0].baseFilename)) sys.exit(2)
class ProcessingContext(object): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: The command-line arguments parser :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): self.pbar = args.pbar self.config_dir = args.i self.project = args.project self.directory = args.directory self.mapfile_name = args.mapfile self.outdir = args.outdir self.notes_title = args.tech_notes_title self.notes_url = args.tech_notes_url self.no_version = args.no_version self.threads = args.max_threads self.use_pool = (self.threads > 1) self.dataset = args.dataset if not args.no_cleanup: self.clean() self.no_cleanup = args.no_cleanup self.no_checksum = args.no_checksum self.dir_filter = args.ignore_dir self.file_filter = [] if args.include_file: self.file_filter.extend([(f, False) for f in args.include_file]) else: # Default includes netCDF only self.file_filter.append(('^.*\.nc$', False)) if args.exclude_file: # Default exclude hidden files self.file_filter.extend([(f, True) for f in args.exclude_file]) else: self.file_filter.append(('^\..*$', True)) self.all = args.all_versions if self.all: self.no_version = False self.version = None if args.version: self.version = 'v{}'.format(args.version) if args.latest_symlink: self.version = 'latest' self.scan_errors = None self.scan_files = None self.scan_err_log = logging.getLogger().handlers[0].baseFilename self.nb_map = None def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) self.facets = self.cfg.get_facets('dataset_id') self.pattern = self.cfg.translate('directory_format', filename_pattern=True) # Get mapfile DRS is set in configuration file try: self.mapfile_drs = self.cfg.get('mapfile_drs') except NoConfigOption: self.mapfile_drs = None # Init data collector if self.pbar: self.sources = VersionedPathCollector( sources=self.directory, data=self, dir_format=self.cfg.translate('directory_format')) else: self.sources = VersionedPathCollector( sources=self.directory, data=self, spinner=False, dir_format=self.cfg.translate('directory_format')) # Init file filter for file_filter in self.file_filter: self.sources.FileFilter[uuid()] = file_filter # Init dir filter self.sources.PathFilter['base_filter'] = (self.dir_filter, True) if self.all: # Pick up all encountered versions by adding "/latest" exclusion self.sources.PathFilter['version_filter'] = ('/latest', True) elif self.version: # Pick up the specified version only (--version flag) by adding "/v{version}" inclusion # If --latest-symlink, --version is set to "latest" self.sources.PathFilter['version_filter'] = '/{}'.format( self.version) # Init progress bar if self.pbar: nfiles = len(self.sources) self.pbar = tqdm( desc='Mapfile(s) generation', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self def __exit__(self, *exc): # Close threads pool if self.use_pool: self.pool.close() self.pool.join() # Decline outputs depending on the scan results # Raise errors when one or several files have been skipped or failed # Default is sys.exit(0) if self.scan_files and not self.scan_errors: # All files have been successfully scanned # Print number of generated mapfiles if self.pbar: print('{}: {} (see {})'.format('Mapfile(s) generated', self.nb_map, self.outdir)) logging.info('{} mapfile(s) generated'.format(self.nb_map)) logging.info('==> Scan completed ({} file(s) scanned)'.format( self.scan_files)) if not self.scan_files and not self.scan_errors: # Results list is empty = no files scanned/found if self.pbar: print('No files found') logging.warning('==> No files found') sys.exit(1) if self.scan_files and self.scan_errors: # Print number of scan errors in any case if self.pbar: print('{}: {} (see {})'.format('Scan errors', self.scan_errors, self.scan_err_log)) logging.warning('{} file(s) have been skipped' ' (see {})'.format(self.scan_errors, self.scan_err_log)) if self.scan_errors == self.scan_files: # All files have been skipped or failed during the scan logging.warning( '==> All files have been ignored or have failed leading to no mapfile.' ) sys.exit(3) else: # Some files have been skipped or failed during the scan logging.info('==> Scan completed ({} file(s) scanned)'.format( self.scan_files)) sys.exit(2) def get_checksum_type(self): """ Gets the checksum type to use. Be careful to Exception constants by reading two different sections. :returns: The checksum type :rtype: *str* """ if self.no_checksum: return None _cfg = SectionParser(section='DEFAULT', directory=self.config_dir) if _cfg.has_option('checksum', section='DEFAULT'): checksum_type = _cfg.get_options_from_table( 'checksum')[0][1].lower() else: # Use SHA256 as default because esg.ini not mandatory in configuration directory checksum_type = 'sha256' if checksum_type not in checksum_types: raise InvalidChecksumType(checksum_type) return checksum_type def clean(self): """ Clean directory from incomplete mapfiles. Incomplete mapfiles from a previous run are silently removed. """ for root, _, filenames in os.walk(self.outdir): for filename in fnmatch.filter(filenames, '*{}'.format(WORKING_EXTENSION)): os.remove(os.path.join(root, filename)) logging.info('{} cleaned'.format(self.outdir))
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) self.facets = self.cfg.get_facets('dataset_id') # Get mapfile DRS is set in configuration file try: _cfg = SectionParser(section='config:{}'.format(self.project), directory=self.config_dir) self.mapfile_drs = _cfg.get('mapfile_drs') except (NoConfigOption, NoConfigSection): self.mapfile_drs = None # Init data collector if self.directory: # The source is a list of directories # Instantiate file collector to walk through the tree self.source_type = 'file' if self.pbar: self.sources = VersionedPathCollector(sources=self.directory, data=self, dir_format=self.cfg.translate('directory_format')) else: self.sources = VersionedPathCollector(sources=self.directory, data=self, spinner=False, dir_format=self.cfg.translate('directory_format')) # Translate directory format pattern self.pattern = self.cfg.translate('directory_format', add_ending_filename=True) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) if self.all: # Pick up all encountered versions by adding "/latest" exclusion self.sources.PathFilter.add(name='version_filter', regex='/latest', inclusive=False) elif self.version: # Pick up the specified version only (--version flag) by adding "/v{version}" inclusion # If --latest-symlink, --version is set to "latest" self.sources.PathFilter.add(name='version_filter', regex='/{}'.format(self.version)) else: # Default behavior: pick up the latest version among encountered versions self.sources.default = True elif self.dataset_list: # The source is a list of dataset from a TXT file self.source_type = 'dataset' self.sources = DatasetCollector(sources=[x.strip() for x in self.dataset_list.readlines() if x.strip()], data=self, spinner=False) # Translate dataset_id format self.pattern = self.cfg.translate('dataset_id', add_ending_version=True, sep='.') else: # The source is a dataset ID (potentially from stdin) self.source_type = 'dataset' self.sources = DatasetCollector(sources=[self.dataset_id], data=self, spinner=False) # Translate dataset_id format self.pattern = self.cfg.translate('dataset_id', add_ending_version=True, sep='.') # Init progress bar nfiles = len(self.sources) if self.pbar and nfiles: self.pbar = tqdm(desc='Mapfile(s) generation', total=nfiles, bar_format='{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} ' + SOURCE_TYPE[self.source_type], ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self
class ProcessingContext(object): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: Parsed command-line arguments :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): self.pbar = args.pbar self.config_dir = args.i self.directory = args.directory self.root = os.path.normpath(args.root) self.rescan = args.rescan self.commands_file = args.commands_file self.overwrite_commands_file = args.overwrite_commands_file self.upgrade_from_latest = args.upgrade_from_latest self.set_values = {} if args.set_value: self.set_values = dict(args.set_value) self.set_keys = {} if args.set_key: self.set_keys = dict(args.set_key) self.threads = args.max_threads self.use_pool = (self.threads > 1) self.project = args.project self.action = args.action if args.copy: self.mode = 'copy' elif args.link: self.mode = 'link' elif args.symlink: self.mode = 'symlink' else: self.mode = 'move' self.version = args.version DRSPath.TREE_VERSION = 'v{}'.format(args.version) self.scan = True self.scan_errors = None self.scan_files = None self.scan_err_log = logging.getLogger().handlers[0].baseFilename if self.commands_file and self.action != 'todo': print '"{}" action ignores "--commands-file" argument.'.format( self.action) self.commands_file = None if self.overwrite_commands_file and not self.commands_file: print '--overwrite-commands-file ignored' def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # check if --commands-file argument specifies existing file self.check_existing_commands_file() # Warn user about unconsidered hard-coded elements for pattern_element in self.cfg.get('directory_format').strip().split( "/"): if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element): msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \ 'are not supported.'.format(pattern_element) if self.pbar: print(msg) logging.warning(msg) break self.facets = self.cfg.get_facets('directory_format') self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter[uuid()] = ('^.*\.nc$', False) # And exclude hidden files self.sources.FileFilter[uuid()] = ('^\..*$', True) # Init progress bar if self.pbar: nfiles = len(self.sources) self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self def check_existing_commands_file(self): """ Check for existing commands file, and depending on ``--overwrite-commands-file`` setting, either delete it or throw a fatal error. """ if self.commands_file and os.path.exists(self.commands_file): if self.overwrite_commands_file: os.remove(self.commands_file) else: print "File '{}' already exists and '--overwrite-commands-file'" \ "option not used.".format(self.commands_file) sys.exit(1) def __exit__(self, *exc): # Close threads pool if self.use_pool: self.pool.close() self.pool.join() # Decline outputs depending on the scan results # Raise errors when one or several files have been skipped or failed # Default is sys.exit(0) if self.scan_files and not self.scan_errors: # All files have been successfully scanned logging.info('==> Scan completed ({} file(s) scanned)'.format( self.scan_files)) if not self.scan_files and not self.scan_errors: # Results list is empty = no files scanned/found if self.pbar: print('No files found') logging.warning('==> No files found') sys.exit(1) if self.scan_files and self.scan_errors: if self.scan: msg = 'Scan errors: {} (see {})' else: msg = 'Orginal scan errors: {} (previously written to {})' # Print number of scan errors in any case if self.pbar: print(msg.format(self.scan_errors, self.scan_err_log)) logging.warning('{} file(s) have been skipped' ' (see {})'.format(self.scan_errors, self.scan_err_log)) if self.scan_errors == self.scan_files: # All files have been skipped or failed during the scan logging.warning( '==> All files have been ignored or have failed leading to no DRS tree.' ) sys.exit(3) else: # Some files have been skipped or failed during the scan logging.info('==> Scan completed ({} file(s) scanned)'.format( self.scan_files)) sys.exit(2) def get_checksum_type(self): """ Gets the checksum type to use. Be careful to Exception constants by reading two different sections. :returns: The checksum type :rtype: *str* """ _cfg = SectionParser(section='DEFAULT', directory=self.config_dir) if _cfg.has_option('checksum', section='DEFAULT'): checksum_type = _cfg.get_options_from_table( 'checksum')[0][1].lower() else: # Use SHA256 as default because esg.ini not mandatory in configuration directory checksum_type = 'sha256' if checksum_type not in checksum_types: raise InvalidChecksumType(checksum_type) return checksum_type def check_args(self, old_args): """ Checks command-line argument to avoid discrepancies between ``esgprep drs`` steps. :param *dict* old_args: The recorded arguments :raises Error: If one argument differs """ for k in CONTROLLED_ARGS: if self.__getattribute__(k) != old_args[k]: logging.warning( '"{}" argument has changed: "{}" instead of "{}". ' 'File rescan needed.'.format(k, self.__getattribute__(k), old_args[k])) return False return True
def declare_map(config, facet): maps = [] if config.has_option('maps'): maps = map(str.strip, config.get('maps').split(',')) maps.append('{}_map'.format(facet)) config.set('maps', build_line(tuple(maps), sep=', ')) if __name__ == "__main__": args = get_args() auth = HTTPBasicAuth( args.gh_user, args.gh_password) if args.gh_user and args.gh_password else None config = SectionParser(section='project:{}'.format(args.project)) # Get all facet keys from format elements facets = get_facets() config.set('categories', get_categories(facets), newline=True) defaults = [('project', 'CMIP6')] defaults = tuple([ build_line(default, length=lengths(defaults), indent=True) for default in sorted(defaults) ]) config.set('category_defaults', build_line(defaults, sep='\n'), newline=True) config.set('filename_format', FILENAME_FORMAT) config.set('directory_format', DIRECTORY_FORMAT) config.set('dataset_id', DATASET_ID) config.set('dataset_name_format', DATASET_FORMAT)
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Check if --commands-file argument specifies existing file self.check_existing_commands_file() # Get DRS facets self.facets = self.cfg.get_facets('directory_format') # Raise error when %(version)s is not part of the final directory format if 'version' not in self.facets: raise NoVersionPattern(self.cfg.get('directory_format'), self.facets) # Consider hard-coded elements in directory format idx = 0 for pattern_element in self.cfg.get('directory_format').strip().split( "/"): try: # If pattern is %(...)s # Get its index in the list of facets key = re.match(re.compile(r'%\(([\w]+)\)s'), pattern_element).groups()[0] idx = self.facets.index(key) except AttributeError: # If pattern is not %(...)s # Generate a uuid() key = str(uuid()) # Insert hard-coded string in self.facets to be part of DRS path self.facets.insert(idx + 1, key) # Set the value using --set-value self.set_values[key] = pattern_element # Add the uuid to the ignored keys IGNORED_KEYS.append(key) self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter.add(regex='^.*\.nc$') # And exclude hidden files self.sources.FileFilter.add(regex='^\..*$', inclusive=False) # Init progress bar if self.scan: nfiles = len(self.sources) if self.pbar and nfiles: self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) else: msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \ 'Using cached DRS tree from {}'.format(TREE_FILE) if self.pbar: print(msg) logging.warning(msg) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self
class BaseContext(object): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: Parsed command-line arguments :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): # Init print management Print.init(log=args.log, debug=args.debug, all=args.all, cmd=args.prog) # Print command-line Print.command() self._process_color_arg(args) # Get project and related configuration self.project = args.project self.config_dir = args.i self.processes = args.max_processes if args.max_processes <= cpu_count( ) else cpu_count() self.use_pool = (self.processes != 1) self.lock = Lock() self.nbfiles = 0 self.nbskip = 0 self.nberrors = 0 self.file_filter = [] if args.include_file: self.file_filter.extend([(f, True) for f in args.include_file]) else: # Default includes netCDF only self.file_filter.append(('^.*\.nc$', True)) if args.exclude_file: # Default exclude hidden files self.file_filter.extend([(f, False) for f in args.exclude_file]) else: self.file_filter.append(('^\..*$', False)) self.dir_filter = args.ignore_dir # Init process manager if self.use_pool: manager = SyncManager() manager.start() Print.BUFFER = manager.Value(c_char_p, '') self.progress = manager.Value('i', 0) else: self.progress = Value('i', 0) self.tunits_default = None if self.project in DEFAULT_TIME_UNITS.keys(): self.tunits_default = DEFAULT_TIME_UNITS[self.project] # Change frequency increment if args.set_inc: for table, frequency, increment, units in args.set_inc: if table not in set(zip(*FREQ_INC.keys())[0]): raise InvalidTable(table) if frequency not in set(zip(*FREQ_INC.keys())[1]): raise InvalidFrequency(frequency) keys = [(table, frequency)] if table == 'all': keys = [k for k in FREQ_INC.keys() if k[1] == frequency] if frequency == 'all': keys = [k for k in FREQ_INC.keys() if k[0] == table] for key in keys: FREQ_INC[key] = [float(increment), str(units)] # Get reference time properties if submitted # Default is to deduce them from first file scanned self.ref_calendar = args.calendar self.ref_units = args.units # Init collector self.sources = None def __enter__(self): # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Exclude fixed frequency in any case self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)', inclusive=False) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) # Set driving time properties tinit = TimeInit(ref=self.sources.first(), tunits_default=self.tunits_default) if not self.ref_calendar: self.ref_calendar = tinit.calendar if not self.ref_units: self.ref_units = tinit.tunits # Get project id if not self.project: self.project = get_project(self.sources.first()) # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) self.pattern = self.cfg.translate('filename_format') return self def __exit__(self, exc_type, exc_val, traceback): # Decline outputs depending on the scan results msg = COLORS.HEADER('Number of file(s) scanned: {}\n'.format( self.nbfiles)) m = 'Number of file(s) skipped: {}'.format(self.nbskip) if self.nbskip: msg += COLORS.FAIL(m) else: msg += COLORS.SUCCESS(m) # Print summary Print.summary(msg) # Print log path if exists Print.log() def _process_color_arg(self, args): # process --color / --no-color arg if present if 'color' in args and args.color: enable_colors() if 'no_color' in args and args.no_color: disable_colors()
class ProcessingContext(object): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: The command-line arguments parser :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): self.pbar = args.pbar self.project = args.project self.config_dir = args.i self.directory = args.directory self.dataset_list = args.dataset_list self.dir_filter = args.ignore_dir self.file_filter = [] if args.include_file: self.file_filter.extend([(f, True) for f in args.include_file]) else: # Default includes netCDF only self.file_filter.append(('^.*\.nc$', True)) if args.exclude_file: # Default exclude hidden files self.file_filter.extend([(f, False) for f in args.exclude_file]) else: self.file_filter.append(('^\..*$', False)) self.scan_errors = 0 self.any_undeclared = False def __enter__(self): # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Init data collector if self.directory: # The source is a list of directories # Instantiate file collector to walk through the tree self.source_type = 'files' if self.pbar: self.sources = PathCollector(sources=self.directory) else: self.sources = PathCollector(sources=self.directory, spinner=False) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) self.pattern = self.cfg.translate('directory_format', add_ending_filename=True) else: # The source is a list of files (i.e., several dataset lists) # Instantiate dataset collector to parse the files self.source_type = 'datasets' if self.pbar: self.sources = DatasetCollector(source=[ x.strip() for x in self.dataset_list.readlines() if x.strip() ], versioned=False) else: self.sources = DatasetCollector(source=[ x.strip() for x in self.dataset_list.readlines() if x.strip() ], spinner=False, versioned=False) self.pattern = self.cfg.translate('dataset_id') # Get the facet keys from pattern self.facets = set(re.compile( self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS)) # Init progress bar nfiles = len(self.sources) if self.pbar and nfiles: self.sources = tqdm( self.sources, desc='Harvesting facets values from data', total=nfiles, bar_format='{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} ' + self.source_type, ncols=100, file=sys.stdout) return self def __exit__(self, *exc): # Default is sys.exit(0) if self.scan_errors > 0: print('{}: {} (see {})'.format( 'Scan errors', self.scan_errors, logging.getLogger().handlers[0].baseFilename)) sys.exit(1) if self.any_undeclared: print('Please update "esg.{}.ini" following: {}'.format( self.project, logging.getLogger().handlers[0].baseFilename)) sys.exit(2)
class BaseContext(object): """ Encapsulates the processing context/information for main process. :param ArgumentParser args: Parsed command-line arguments :returns: The processing context :rtype: *ProcessingContext* """ def __init__(self, args): # Init print management Print.init(log=args.log, debug=args.debug, all=args.all, cmd=args.prog) # Print command-line Print.command() self._process_color_arg(args) # Get project and related configuration self.project = args.project self.config_dir = args.i self.processes = args.max_processes if args.max_processes <= cpu_count() else cpu_count() self.use_pool = (self.processes != 1) self.lock = Lock() self.nbfiles = 0 self.nbskip = 0 self.nberrors = 0 self.file_filter = [] if args.include_file: self.file_filter.extend([(f, True) for f in args.include_file]) else: # Default includes netCDF only self.file_filter.append(('^.*\.nc$', True)) if args.exclude_file: # Default exclude hidden files self.file_filter.extend([(f, False) for f in args.exclude_file]) else: self.file_filter.append(('^\..*$', False)) self.dir_filter = args.ignore_dir # Init process manager if self.use_pool: manager = SyncManager() manager.start() Print.BUFFER = manager.Value(c_char_p, '') self.progress = manager.Value('i', 0) else: self.progress = Value('i', 0) self.tunits_default = None if self.project in DEFAULT_TIME_UNITS.keys(): self.tunits_default = DEFAULT_TIME_UNITS[self.project] # Change frequency increment if args.set_inc: for table, frequency, increment, units in args.set_inc: if table not in set(zip(*FREQ_INC.keys())[0]): raise InvalidTable(table) if frequency not in set(zip(*FREQ_INC.keys())[1]): raise InvalidFrequency(frequency) keys = [(table, frequency)] if table == 'all': keys = [k for k in FREQ_INC.keys() if k[1] == frequency] if frequency == 'all': keys = [k for k in FREQ_INC.keys() if k[0] == table] for key in keys: FREQ_INC[key] = [float(increment), str(units)] # Get reference time properties if submitted # Default is to deduce them from first file scanned self.ref_calendar = args.calendar self.ref_units = args.units # Init collector self.sources = None def __enter__(self): # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Exclude fixed frequency in any case self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)', inclusive=False) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) # Set driving time properties tinit = TimeInit(ref=self.sources.first(), tunits_default=self.tunits_default) if not self.ref_calendar: self.ref_calendar = tinit.calendar if not self.ref_units: self.ref_units = tinit.tunits # Get project id if not self.project: self.project = get_project(self.sources.first()) # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) self.pattern = self.cfg.translate('filename_format') return self def __exit__(self, exc_type, exc_val, traceback): # Decline outputs depending on the scan results msg = COLORS.HEADER('Number of file(s) scanned: {}\n'.format(self.nbfiles)) m = 'Number of file(s) skipped: {}'.format(self.nbskip) if self.nbskip: msg += COLORS.FAIL(m) else: msg += COLORS.SUCCESS(m) # Print summary Print.summary(msg) # Print log path if exists Print.log() def _process_color_arg(self, args): # process --color / --no-color arg if present if 'color' in args and args.color: enable_colors() if 'no_color' in args and args.no_color: disable_colors()