def list_filepaths(self, wildcard=None): """ Return the list of absolute filepaths in the directory. Args: wildcard: String of tokens separated by "|". Each token represents a pattern. If wildcard is not None, we return only those files whose basename matches the given shell pattern (uses fnmatch). Example: wildcard="*.nc|*.pdf" selects only those files that end with .nc or .pdf """ # Select the files in the directory. fnames = [f for f in os.listdir(self.path)] filepaths = filter(os.path.isfile, [os.path.join(self.path, f) for f in fnames]) if wildcard is not None: # Filter using shell patterns. w = WildCard(wildcard) filepaths = [ path for path in filepaths if w.match(os.path.basename(path)) ] #filepaths = WildCard(wildcard).filter(filepaths) return filepaths
def parse(self, filename, verbose=0): """ Parse the given file. Return :class:`EventReport`. """ run_completed, start_datetime, end_datetime = False, None, None filename = os.path.abspath(filename) report = EventReport(filename) w = WildCard( "*Error|*Warning|*Comment|*Bug|*ERROR|*WARNING|*COMMENT|*BUG") import warnings warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning) with YamlTokenizer(filename) as tokens: for doc in tokens: if w.match(doc.tag): #print("got doc.tag", doc.tag,"--") try: #print(doc.text) event = yaml.load( doc.text) # Can't use ruamel safe_load! #yaml.load(doc.text, Loader=ruamel.yaml.Loader) #print(event.yaml_tag, type(event)) except: #raise # Wrong YAML doc. Check tha doc tag and instantiate the proper event. message = "Malformatted YAML document at line: %d\n" % doc.lineno message += doc.text # This call is very expensive when we have many exceptions due to malformatted YAML docs. if verbose: message += "Traceback:\n %s" % straceback() if "error" in doc.tag.lower(): print("It seems an error. doc.tag:", doc.tag) event = AbinitYamlError(message=message, src_file=__file__, src_line=0) else: event = AbinitYamlWarning(message=message, src_file=__file__, src_line=0) event.lineno = doc.lineno report.append(event) # Check whether the calculation completed. if doc.tag == "!FinalSummary": #print(doc) run_completed = True d = doc.as_dict() #print(d) start_datetime, end_datetime = d["start_datetime"], d[ "end_datetime"] report.set_run_completed(run_completed, start_datetime, end_datetime) return report
def from_files_and_dir(cls, parent, filenames=None, dirpath=None, walk=True, wildcard=""): """ Static constructure that reads the content of the files/directory specified in input. Args: filenames: List of files to show in the botebook. Defaults to an empty list. dirpath: Directory to scan for additional files. walk: Used only if dirpath is not None. If True, we scan all the files contained within dirpath and we add them to the list if their name match the regular expression given in wildcard. wildcard: String with regular expressions separated by `|`. Only the files matching one of the regular expressions will be showed. example: wildcard='*.nc|*.txt' shows only the files whose extension is in ['nc', 'txt']. """ wildcard = WildCard(wildcard) if filenames is None: filenames = [] filenames = wildcard.filter(filenames) if dirpath is not None: if not walk: filenames += wildcard.filter(os.listdir(dirpath)) else: for root, dirnames, fnames in os.walk(dirpath): for fname in fnames: if wildcard.match(fname): filenames.append(os.path.join(root, fname)) #frame = EditorNotebookFrame(parent) #frame.notebook.DeletePage(0) #for fname in filenames: # frame.bufferCreate(filename=fname) #return frame # Open the files and read the content in a string text_list = [] for fname in filenames: with open(fname, "r") as fh: # Sanitize strings: use "ignore" to skip invalid characters in .encode/.decode like s = fh.read().decode("utf-8", "ignore") text_list.append(s) return cls(parent, text_list, page_names=filenames)
def from_dojodir(cls, top, exclude_wildcard=None, exclude_basenames=None): """ Initialize the table from one of the top level directories located in the pseudo_dojo.pseudos directory. Args: top: top level directory exclude_basenames: Optional string or list of strings with the pseudo basenames to be excluded. exclude_wildcard: String of tokens separated by "|". Each token represents a pattern. to be exluded Example: wildcard="*_r.psp8|*.xml" selects only those files that do not end with _r.psp8 or .xml .. warning:: The table may contain multiple pseudos for a given chemical element. Don't use this method unless you need this feature and you know what you are doing. """ # Read metadata from the __init__.py file import imp module_name = os.path.join(top, "__init__.py") if not os.path.isfile(module_name): raise RuntimeError("__init_.py file is missing in dir: %s" % top) meta = imp.load_source(module_name, os.path.join(top, "__init__.py")) # Gather all pseudos starting from the current working directory all_symbols = set(e.symbol for e in Element) dirs = [ os.path.join(top, d) for d in os.listdir(top) if d in all_symbols ] exclude = set(list_strings( exclude_basenames)) if exclude_basenames is not None else set() paths = [] for dr in dirs: paths.extend( os.path.join(dr, f) for f in os.listdir(dr) if f.endswith(meta.pseudo_ext) and f not in exclude) if exclude_wildcard is not None: wild = WildCard(exclude_wildcard) paths = [p for p in paths if not wild.match(os.path.basename(p))] pseudos = [] for p in paths: pseudo = dojopseudo_from_file(p) if pseudo is None: print("Error while parsing:", p) continue pseudos.append(pseudo) return cls(pseudos).sort_by_z()
def from_dojodir(cls, top, exclude_wildcard=None, exclude_basenames=None): """ Initialize the table from one of the top level directories located in the pseudo_dojo.pseudos directory. Args: top: top level directory exclude_basenames: Optional string or list of strings with the pseudo basenames to be excluded. exclude_wildcard: String of tokens separated by "|". Each token represents a pattern. to be exluded Example: wildcard="*_r.psp8|*.xml" selects only those files that do not end with _r.psp8 or .xml .. warning:: The table may contain multiple pseudos for a given chemical element. Don't use this method unless you need this feature and you know what you are doing. """ # Read metadata from the __init__.py file import imp module_name = os.path.join(top, "__init__.py") if not os.path.isfile(module_name): raise RuntimeError("__init_.py file is missing in dir: %s" % top) meta = imp.load_source(module_name, os.path.join(top, "__init__.py") ) # Gather all pseudos starting from the current working directory all_symbols = set(e.symbol for e in Element) dirs = [os.path.join(top, d) for d in os.listdir(top) if d in all_symbols] exclude = set(list_strings(exclude_basenames)) if exclude_basenames is not None else set() paths = [] for dr in dirs: paths.extend(os.path.join(dr, f) for f in os.listdir(dr) if f.endswith(meta.pseudo_ext) and f not in exclude) if exclude_wildcard is not None: wild = WildCard(exclude_wildcard) paths = [p for p in paths if not wild.match(os.path.basename(p))] pseudos = [] for p in paths: pseudo = dojopseudo_from_file(p) if pseudo is None: print("Error while parsing:", p) continue pseudos.append(pseudo) return cls(pseudos).sort_by_z()
def parse(self, filename, verbose=0): """ Parse the given file. Return :class:`EventReport`. """ run_completed, start_datetime, end_datetime = False, None, None filename = os.path.abspath(filename) report = EventReport(filename) w = WildCard("*Error|*Warning|*Comment|*Bug|*ERROR|*WARNING|*COMMENT|*BUG") import warnings warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning) with YamlTokenizer(filename) as tokens: for doc in tokens: if w.match(doc.tag): #print("got doc.tag", doc.tag,"--") try: #print(doc.text) event = yaml.load(doc.text) # Can't use ruamel safe_load! #yaml.load(doc.text, Loader=ruamel.yaml.Loader) #print(event.yaml_tag, type(event)) except: #raise # Wrong YAML doc. Check tha doc tag and instantiate the proper event. message = "Malformatted YAML document at line: %d\n" % doc.lineno message += doc.text # This call is very expensive when we have many exceptions due to malformatted YAML docs. if verbose: message += "Traceback:\n %s" % straceback() if "error" in doc.tag.lower(): print("It seems an error. doc.tag:", doc.tag) event = AbinitYamlError(message=message, src_file=__file__, src_line=0) else: event = AbinitYamlWarning(message=message, src_file=__file__, src_line=0) event.lineno = doc.lineno report.append(event) # Check whether the calculation completed. if doc.tag == "!FinalSummary": #print(doc) run_completed = True d = doc.as_dict() #print(d) start_datetime, end_datetime = d["start_datetime"], d["end_datetime"] report.set_run_completed(run_completed, start_datetime, end_datetime) return report
def parse(self, filename): """ Parse the given file. Return :class:`EventReport`. """ run_completed = False filename = os.path.abspath(filename) report = EventReport(filename) # TODO Use CamelCase for the Fortran messages. # Bug is still an error of class SoftwareError w = WildCard("*Error|*Warning|*Comment|*Bug|*ERROR|*WARNING|*COMMENT|*BUG") with YamlTokenizer(filename) as tokens: for doc in tokens: #print(80*"*") #print("doc.tag", doc.tag) #print("doc", doc) #print(80*"*") if w.match(doc.tag): #print("got doc.tag", doc.tag,"--") try: event = yaml.load(doc.text) except: # Wrong YAML doc. Check tha doc tag and instantiate the proper event. message = "Malformatted YAML document at line: %d\n" % doc.lineno message += doc.text # This call is very expensive when we have many exceptions due to malformatted YAML docs. if self.DEBUG_LEVEL: message += "Traceback:\n %s" % straceback() if "error" in doc.tag.lower(): print("It seems an error", doc.tag) event = AbinitYamlError(message=message, src_file=__file__, src_line=0) else: event = AbinitYamlWarning(message=message, src_file=__file__, src_line=0) event.lineno = doc.lineno report.append(event) # Check whether the calculation completed. if doc.tag == "!FinalSummary": run_completed = True report.set_run_completed(run_completed) return report
def rmtree(self, exclude_wildcard=""): """ Remove all files and directories in the working directory Args: exclude_wildcard: Optional string with regular expressions separated by `|`. Files matching one of the regular expressions will be preserved. example: exclude_wildard="*.nc|*.txt" preserves all the files whose extension is in ["nc", "txt"]. """ if not exclude_wildcard: shutil.rmtree(self.workdir) else: w = WildCard(exclude_wildcard) for dirpath, dirnames, filenames in os.walk(self.workdir): for fname in filenames: path = os.path.join(dirpath, fname) if not w.match(fname): os.remove(path)
def find_exts(top, exts, exclude_dirs=None, include_dirs=None, match_mode="basename"): """ Find all files with the extension listed in `exts` that are located within the directory tree rooted at `top` (including top itself, but excluding '.' and '..') Args: top (str): Root directory exts (str or list of strings): List of extensions. exclude_dirs (str): Wildcards used to exclude particular directories. Can be concatenated via `|` include_dirs (str): Wildcards used to select particular directories. `include_dirs` and `exclude_dirs` are mutually exclusive match_mode (str): "basename" if match should be done on the basename. "abspath" for absolute path. Returns: (list of str): Absolute paths of the files. Examples:: # Find all pdf and ps files starting from the current directory. find_exts(".", ("pdf", "ps")) # Find all pdf files, exclude hidden directories and dirs whose name # starts with `_` find_exts(".", "pdf", exclude_dirs="_*|.*") # Find all ps files, in the directories whose basename starts with # output. find_exts(".", "ps", include_dirs="output*")) """ from monty.string import list_strings exts = list_strings(exts) # Handle file! if os.path.isfile(top): return [os.path.abspath(top)] if any( top.endswith(ext) for ext in exts) else [] # Build shell-style wildcards. from monty.fnmatch import WildCard if exclude_dirs is not None: exclude_dirs = WildCard(exclude_dirs) if include_dirs is not None: include_dirs = WildCard(include_dirs) mangle = dict(basename=os.path.basename, abspath=os.path.abspath)[match_mode] # Assume directory paths = [] for dirpath, dirnames, filenames in os.walk(top): dirpath = os.path.abspath(dirpath) if exclude_dirs and exclude_dirs.match(mangle(dirpath)): continue if include_dirs and not include_dirs.match(mangle(dirpath)): continue for filename in filenames: if any(filename.endswith(ext) for ext in exts): paths.append(os.path.join(dirpath, filename)) return paths
def find_exts(top, exts, exclude_dirs=None, include_dirs=None, match_mode="basename"): """ Find all files with the extension listed in exts that are located within the directory tree rooted at top (including top itself, but excluding '.' and '..') Args: top (str): Root directory exts (str or list of strings): List of extensions. exclude_dirs (str): Wildcards used to exclude particular directories. Can be concatenated via `|` include_dirs (str): Wildcards used to select particular directories. `include_dirs` and `exclude_dirs` are mutually exclusive match_mode (str): "basename" if match should be done on the basename. "abspath" for absolute path. Returns: (list of str): Absolute paths of the files. Examples:: # Find all pdf and ps files starting from the current directory. find_exts(".", ("pdf", "ps")) # Find all pdf files, exclude hidden directories and dirs whose name # starts with `_` find_exts(".", "pdf", exclude_dirs="_*|.*") # Find all ps files, in the directories whose basename starts with # output. find_exts(".", "ps", include_dirs="output*")) """ from monty.string import list_strings exts = list_strings(exts) # Handle file! if os.path.isfile(top): return [os.path.abspath(top)] if any(top.endswith(ext) for ext in exts) else [] # Build shell-style wildcards. from monty.fnmatch import WildCard if exclude_dirs is not None: assert include_dirs is None exclude_dirs = WildCard(exclude_dirs) if include_dirs is not None: assert exclude_dirs is None include_dirs = WildCard(include_dirs) mangle = dict( basename=os.path.basename, abspath=os.path.abspath)[match_mode] # Assume directory paths = [] for dirpath, dirnames, filenames in os.walk(top): dirpath = os.path.abspath(dirpath) if exclude_dirs and exclude_dirs.match(mangle(dirpath)): continue if include_dirs and not include_dirs.match(mangle(dirpath)): continue for filename in filenames: if any(filename.endswith(ext) for ext in exts): paths.append(os.path.join(dirpath, filename)) return paths
def test_match(self): wc = WildCard("*.pdf") self.assertTrue(wc.match("A.pdf")) self.assertFalse(wc.match("A.pdg"))