def parse_ion(stream): """ Try to get relevant information from the .ion. For the moment, only the element symbol, name, mass and atomic number. Raise a ParsingError exception if the file does not contain the element symbol or the atomic number. The presence in the file of mass and name is, instead, considered optional. If not present, None is returned. """ from xml.etree.ElementTree import ElementTree from aiida.common.exceptions import ParsingError from aiida.orm.nodes.data.structure import _valid_symbols parsed_data = {} el_tr = ElementTree(None, stream) root = el_tr.getroot() if root.find('symbol') is None or root.find('z') is None or root.find( 'label') is None: raise ParsingError( f"Currupted ion file {stream.name}: element symbol or atomic number missing" ) parsed_data["element"] = str(root.find('symbol').text.strip()) if parsed_data["element"] not in _valid_symbols: raise ParsingError( f"Unknown element symbol {parsed_data['element']} in file {stream.name}" ) parsed_data["name"] = str(root.find('label').text.strip()) parsed_data["atomic_number"] = int(root.find('z').text) parsed_data["mass"] = float(root.find('mass').text) return parsed_data
def parse_with_retrieved(self, retreived): """Parses the datafolder, stores results. This parser for this code ... """ from aiida.common.exceptions import InvalidOperation from aiida.common import aiidalogger # suppose at the start that the job is unsuccessful, unless proven otherwise successful = False # check whether the yambo calc was an initialisation (p2y) try: settings_dict = self._calc.inp.settings.get_dict() settings_dict = _uppercase_dict(settings_dict, dict_name='settings') except AttributeError: settings_dict = {} initialise = settings_dict.pop('INITIALISE', None) # select the folder object out_folder = self._calc.get_retrieved_node() # check what is inside the folder list_of_files = out_folder.get_folder_list() try: input_params = self._calc.inp.parameters.get_dict() except AttributeError: if not initialise: raise ParsingError("Input parameters not found!") else: input_params = {} # retrieve the cell: if parent_calc is a YamboCalculation we must find the original PwCalculation # going back through the graph tree. parent_calc = self._calc.inp.parent_calc_folder.inp.remote_folder cell = {} if isinstance(parent_calc, YamboCalculation): has_found_cell = False while (not has_found_cell): try: cell = parent_calc.inp.structure.cell has_found_cell = True except AttributeError: parent_calc = parent_calc.inp.parent_calc_folder.inp.remote_folder elif isinstance(parent_calc, PwCalculation): cell = self._calc.inp.parent_calc_folder.inp.remote_folder.inp.structure.cell output_params = {'warnings': [], 'errors': [], 'yambo_wrote': False} new_nodes_list = [] ndbqp = {} ndbhf = {} try: results = YamboFolder(out_folder.get_abs_path()) except Exception, e: success = False raise ParsingError("Unexpected behavior of YamboFolder: %s" % e)
def _parse_first_line(line, fname): """ parse the first line of the basis set :param line: the line string :param fname: the filename string :return: (atomic_number, basis_type, num_shells) """ from aiida.common.exceptions import ParsingError # first line should contain the atomic number as the first argument first_line = line.strip().split() if not len(first_line) == 2: raise ParsingError( "The first line should contain only two fields: '{}' for file {}". format(line, fname)) atomic_number_str = first_line[0] if not atomic_number_str.isdigit(): raise ParsingError( "The first field should be the atomic number '{}' for file {}". format(line, fname)) anumber = int(atomic_number_str) atomic_number = None basis_type = None if anumber < 99: atomic_number = anumber basis_type = "all-electron" elif 200 < anumber < 999: raise NotImplementedError( "valence electron basis sets not currently supported") # TODO support valence electron basis sets not currently supported (ECP must also be defined) # atomic_number = anumber % 100 # basis_type = "valence-electron" elif anumber > 1000: atomic_number = anumber % 100 basis_type = "all-electron" if atomic_number is None: raise ParsingError("Illegal atomic number {} for file {}".format( anumber, fname)) num_shells_str = first_line[1] if not num_shells_str.isdigit(): raise ParsingError( "The second field should be the number of shells {} for file {}". format(line, fname)) num_shells = int(num_shells_str) # we would deal with different numbering at .d12 creation time newline = "{0} {1}\n".format( atomic_number if basis_type == "all-electron" else 200 + atomic_number, num_shells) return atomic_number, basis_type, num_shells, newline
def get_cutoffs_from_djrepo(cls, djrepo, pseudo_type): """Collect and organize the suggested cutoffs (hints) from a DJREPO file. DJREPO files only provide a kinetic energy cutoff, so for pseudo types which contain norm-conserving pseudos from PseudoDojo, we use a dual of 8.0 to generate the charge density (rho) cutoff. For PAW potentials from PseudoDojo (which are assumed to be JthXmlData), a dual of 2.0 is used. The cutoffs in DJREPO files are given in Hartree, which is converted to eV. :param djrepo: dictionary loaded from DJREPO JSON file :returns: cutoffs dictionary (in eV) where keys are stringency levels and values are {'cutoff_wfc': ..., 'cutoff_rho': ...} """ dual_mapping = { UpfData: 4.0, Psp8Data: 4.0, PsmlData: 4.0, JthXmlData: 2.0 } try: dual = dual_mapping[pseudo_type] except KeyError as exception: raise ValueError( f'cannot get cutoffs for pseudo type `{pseudo_type}` because the appropriate dual ' 'for generating density cutoffs is unknown') from exception cutoffs = {} try: hints = djrepo['hints'] except KeyError as exception: raise ParsingError( 'key `hints` is not defined in the djrepo.') from exception for stringency in ['low', 'normal', 'high']: try: ecutwfc = hints.get(stringency, {})['ecut'] except KeyError as exception: raise ParsingError( f'stringency `{stringency}` is not defined in the djrepo `hints`' ) from exception cutoffs[stringency] = { 'cutoff_wfc': ecutwfc, 'cutoff_rho': ecutwfc * dual } return cutoffs
def set_file(self, file): """Store the file in the repository and parse it to set the `element` and `md5` attributes. :param file: filepath or filelike object of the UPF potential file to store. """ # pylint: disable=redefined-builtin from aiida.common.exceptions import ParsingError from aiida.common.files import md5_file, md5_from_filelike parsed_data = parse_upf(file) try: md5sum = md5_file(file) except TypeError: md5sum = md5_from_filelike(file) try: element = parsed_data['element'] except KeyError: raise ParsingError( "No 'element' parsed in the UPF file {}; unable to store". format(self.filename)) super(UpfData, self).set_file(file) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum)
def store(self, *args, **kwargs): """ Store the node, reparsing the file so that the md5 and the element are correctly reset. (**why?) """ from aiida.common.exceptions import ParsingError, ValidationError import aiida.common.utils from aiida.common.files import md5_from_filelike if self.is_stored: return self # Already done?? with self.open(mode='r') as handle: parsed_data = parse_psml(handle.name) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5sum = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSML file {};" " unable to store".format(self.filename)) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum) return super(PsmlData, self).store(*args, **kwargs)
def set_file(self, file, filename=None): """Store the file in the repository and parse it to set the `element` and `md5` attributes. :param file: filepath or filelike object of the UPF potential file to store. Hint: Pass io.BytesIO(b"my string") to construct the file directly from a string. :param filename: specify filename to use (defaults to name of provided file). """ # pylint: disable=redefined-builtin from aiida.common.exceptions import ParsingError from aiida.common.files import md5_file, md5_from_filelike parsed_data = parse_upf(file) try: md5sum = md5_file(file) except TypeError: md5sum = md5_from_filelike(file) try: element = parsed_data['element'] except KeyError: raise ParsingError( "No 'element' parsed in the UPF file {}; unable to store". format(self.filename)) super(UpfData, self).set_file(file, filename=filename) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum)
def store(self, **kwargs): #pylint: disable=arguments-differ """ Store the node, reparsing the file so that the md5 and the element are correctly reset. """ from aiida.common.exceptions import ParsingError from aiida.common.files import md5_from_filelike if self.is_stored: return self with self.open(mode='r') as handle: parsed_data = parse_psf(handle) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5sum = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSF file {};" " unable to store".format(self.filename)) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum) return super().store(**kwargs)
def store(self, *args, **kwargs): """ Store the node, reparsing the file so that the md5 and the element are correctly reset. """ from aiida.common.exceptions import ParsingError, ValidationError import aiida.common.utils psf_abspath = self.get_file_abs_path() if not psf_abspath: raise ValidationError("No valid PSF was passed!") parsed_data = parse_psf(psf_abspath) md5sum = aiida.common.utils.md5_file(psf_abspath) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSF file {};" " unable to store".format(self.filename)) self._set_attr('element', str(element)) self._set_attr('md5', md5sum) return super(PsfData, self).store(*args, **kwargs)
def get_md5_from_djrepo(cls, djrepo, pseudo_type): """Get the appropriate md5 hash from a DJREPO file. :param djrepo: dictionary loaded from DJREPO JSON file. :reutnrs: md5 string. """ md5_key_mapping = { UpfData: 'md5_upf', Psp8Data: 'md5', PsmlData: 'md5_psml', JthXmlData: 'md5' } try: md5_key = md5_key_mapping[pseudo_type] except KeyError as exception: raise ValueError( f'pseudo type `{pseudo_type}` is unsupported by PseudoDojo djrepos: {exception}' ) from exception try: md5 = djrepo[md5_key] except KeyError as exception: raise ParsingError( f'key `{cls.md5_key}` is not defined in the djrepo: {exception}' ) from exception return md5
def store(self, *args, **kwargs): """Store the node, reparsing the file so that the md5 and the element are correctly reset.""" # pylint: disable=arguments-differ from aiida.common.exceptions import ParsingError from aiida.common.files import md5_from_filelike if self.is_stored: return self with self.open(mode='r') as handle: parsed_data = parse_upf(handle) # Open in binary mode which is required for generating the md5 checksum with self.open(mode='rb') as handle: md5 = md5_from_filelike(handle) try: element = parsed_data['element'] except KeyError: raise ParsingError( 'Could not parse the element from the UPF file {}'.format( self.filename)) self.set_attribute('element', str(element)) self.set_attribute('md5', md5) return super(UpfData, self).store(*args, **kwargs)
def parse_psml(fname, check_filename=True): """ Try to get relevant information from the PSML. For the moment, only the element name. If check_filename is True, raise a ParsingError exception if the filename does not start with the element name. """ import os from aiida.common.exceptions import ParsingError # from aiida.common import AIIDA_LOGGER # TODO: move these data in a 'chemistry' module from aiida.orm.nodes.data.structure import _valid_symbols from xml.dom import minidom parsed_data = {} psml_contents = minidom.parse(fname) # Parse the element itemlist = psml_contents.getElementsByTagName('pseudo-atom-spec') s = itemlist[0] element = s.attributes['atomic-label'].value atomic_number = s.attributes['atomic-number'].value z_pseudo = s.attributes['z-pseudo'].value # Only first letter capitalized! if element is None: raise ParsingError( "Unable to find the element of PSML {}".format(fname)) element = element.capitalize() if element not in _valid_symbols: raise ParsingError("Unknown element symbol {} for file {}".format( element, fname)) if check_filename: if not os.path.basename(fname).lower().startswith(element.lower()): raise ParsingError("Filename {0} was recognized for element " "{1}, but the filename does not start " "with {1}".format(fname, element)) parsed_data['element'] = element parsed_data['atomic-number'] = atomic_number parsed_data['z-pseudo'] = z_pseudo return parsed_data
def parse_psf(fname, check_filename=True): """ Try to get relevant information from the PSF. For the moment, only the element name. If check_filename is True, raise a ParsingError exception if the filename does not start with the element name. """ import os from aiida.common.exceptions import ParsingError # from aiida.common import AIIDA_LOGGER from aiida.orm.nodes.data.structure import _valid_symbols parsed_data = {} try: psf_contents = fname.read().split() fname = fname.name except AttributeError: with io.open(fname, encoding='utf8') as fil: psf_contents = fil.read().split() # Parse the element element = None for element in psf_contents: break # Only first letter capitalized! if element is None: raise ParsingError("Unable to find the element of PSF {}".format(fname)) element = element.capitalize() if element not in _valid_symbols: raise ParsingError("Unknown element symbol {} for file {}".format(element, fname)) if check_filename: if not os.path.basename(fname).lower().startswith(element.lower()): raise ParsingError( "Filename {0} was recognized for element " "{1}, but the filename does not start " "with {1}".format(fname, element) ) parsed_data['element'] = element return parsed_data
def parse_psf(fname, check_filename=True): """ Try to get relevant information from the PSF. For the moment, only the element name. If check_filename is True, raise a ParsingError exception if the filename does not start with the element name. """ import os from aiida.common.exceptions import ParsingError # TODO: move these data in a 'chemistry' module from aiida.orm.data.structure import _valid_symbols parsed_data = {} with open(fname) as f: # Parse the element element = None for element in f.read().split(): break # Only first letter capitalized! if element is None: raise ParsingError( "Unable to find the element of PSF {}".format(fname)) element = element.capitalize() if element not in _valid_symbols: raise ParsingError("Unknown element symbol {} for file {}".format( element, fname)) if check_filename: if not os.path.basename(fname).lower().startswith(element.lower()): raise ParsingError("Filename {0} was recognized for element " "{1}, but the filename does not start " "with {1}".format(fname, element)) parsed_data['element'] = element return parsed_data
def __init__(self, pwinput): """ Parse inputs's namelist and cards to create attributes of the info. :param pwinput: Any one of the following * A string of the (existing) absolute path to the pwinput file. * A single string containing the pwinput file's text. * A list of strings, with the lines of the file as the elements. * A file object. (It will be opened, if it isn't already.) :raises IOError: if ``pwinput`` is a file and there is a problem reading the file. :raises TypeError: if ``pwinput`` is a list containing any non-string element(s). :raises aiida.common.exceptions.ParsingError: if there are issues parsing the pwinput. """ # Get the text of the pwinput file as a single string. # File. if isinstance(pwinput, file): try: self.input_txt = pwinput.read() except IOError: raise IOError('Unable to open the provided pwinput, {}' ''.format(file.name)) # List. elif isinstance(pwinput, list): if all((issubclass(type(s), basestring) for s in pwinput)): self.input_txt = ''.join(pwinput) else: raise TypeError( 'You provided a list to parse, but some elements were not ' 'strings. Each element should be a string containing a line' 'of the pwinput file.') # Path or string of the text. elif issubclass(type(pwinput), basestring): if os.path.isfile(pwinput): if os.path.exists(pwinput) and os.path.isabs(pwinput): self.input_txt = open(pwinput).read() else: raise IOError( 'Please provide the absolute path to an existing ' 'pwinput file.') else: self.input_txt = pwinput # Check that pwinput is not empty. if len(self.input_txt.strip()) == 0: raise ParsingError('The pwinput provided was empty!')
def str01_to_bool(s): """ Map strings '0', '1' strings to bools: '0' --> True; '1' --> False. While this is opposite to the QE standard, this mapping is what needs to be passed to aiida in a 'settings' ParameterData object. (See the _if_pos method of BasePwCpInputGenerator) """ if s == '0': return True elif s == '1': return False else: raise ParsingError( 'Unable to convert if_pos = {} to bool'.format(s))
def set_file(self, file, filename=None): """ I pre-parse the file to store the attributes. """ from aiida.common.exceptions import ParsingError parsed_data = parse_psml(file) md5sum = md5_file(file) try: element = parsed_data['element'] except KeyError: raise ParsingError( "No 'element' parsed in the PSML file: unable to store") super(PsmlData, self).set_file(file) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum)
def set_file(self, filename): """ I pre-parse the file to store the attributes. """ from aiida.common.exceptions import ParsingError import aiida.common.utils parsed_data = parse_psf(filename) md5sum = aiida.common.utils.md5_file(filename) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSF file {};" " unable to store".format(self.filename)) super(PsfData, self).set_file(filename) self._set_attr('element', str(element)) self._set_attr('md5', md5sum)
def set_file(self, filename): """ I pre-parse the file to store the attributes. """ from aiida.common.exceptions import ParsingError import aiida.common.utils # print("Called set_file","type of filename:",type(filename)) parsed_data = parse_psml(filename) md5sum = md5_file(filename) try: element = parsed_data['element'] except KeyError: raise ParsingError("No 'element' parsed in the PSML file {};" " unable to store".format(self.filename)) super(PsmlData, self).set_file(filename) self.set_attribute('element', str(element)) self.set_attribute('md5', md5sum)
def parse_namelists(txt): """ Parse txt to extract a dictionary of the namelist info. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A nested dictionary of the namelists and their key-value pairs. The namelists will always be upper-case keys, while the parameter keys will always be lower-case. For example: :: {"CONTROL": {"calculation": "bands", "prefix": "al", "pseudo_dir": "./pseudo", "outdir": "./out"}, "ELECTRONS": {"diagonalization": "cg"}, "SYSTEM": {"nbnd": 8, "ecutwfc": 15.0, "celldm(1)": 7.5, "ibrav": 2, "nat": 1, "ntyp": 1} } :rtype: dictionary :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # TODO: Incorporate support for algebraic expressions? # Define the re to match a namelist and extract the info from it. namelist_re = re.compile( r""" ^ [ \t]* &(\S+) [ \t]* $\n # match line w/ nmlst tag; save nmlst name ( [\S\s]*? # match any line non-greedily ) # save the group of text between nmlst ^ [ \t]* / [ \t]* $\n # match line w/ "/" as only non-whitespace char """, re.M | re.X) # Define the re to match and extract all of the key = val pairs inside # a block of namelist text. key_value_re = re.compile( r""" [ \t]* (\S+?) [ \t]* # match and store key = # equals sign separates key and value [ \t]* (\S+?) [ \t]* # match and store value [\n,] # return or comma separates "key = value" pairs """, re.M | re.X) # Scan through the namelists... params_dict = {} for nmlst, blockstr in namelist_re.findall(txt): # ...extract the key value pairs, storing them each in nmlst_dict,... nmlst_dict = {} for key, valstr in key_value_re.findall(blockstr): nmlst_dict[key.lower()] = str2val(valstr) # ...and, store nmlst_dict as a value in params_dict with the namelist # as the key. if len(nmlst_dict.keys()) > 0: params_dict[nmlst.upper()] = nmlst_dict if len(params_dict) == 0: raise ParsingError( 'No data was found while parsing the namelist in the following ' 'text\n' + txt) return params_dict
def parse_upf(fname, check_filename=True): """ Try to get relevant information from the UPF. For the moment, only the element name. Note that even UPF v.2 cannot be parsed with the XML minidom! (e.g. due to the & characters in the human-readable section). If check_filename is True, raise a ParsingError exception if the filename does not start with the element name. """ import os from aiida.common.exceptions import ParsingError from aiida.common import aiidalogger # TODO: move these data in a 'chemistry' module from aiida.orm.data.structure import _valid_symbols parsed_data = {} with open(fname) as f: first_line = f.readline().strip() match = _upfversion_regexp.match(first_line) if match: version = match.group('version') aiidalogger.debug("Version found: {} for file {}".format( version, fname)) else: aiidalogger.debug("Assuming version 1 for file {}".format(fname)) version = "1" parsed_data['version'] = version try: version_major = int(version.partition('.')[0]) except ValueError: # If the version string does not start with a dot, fallback # to version 1 aiidalogger.debug("Falling back to version 1 for file {}, " "version string '{}' unrecognized".format( fname, version)) version_major = 1 element = None if version_major == 1: for l in f: match = _element_v1_regexp.match(l.strip()) if match: element = match.group('element_name') break else: # all versions > 1 for l in f: match = _element_v2_regexp.match(l.strip()) if match: element = match.group('element_name') break if element is None: raise ParsingError( "Unable to find the element of UPF {}".format(fname)) element = element.capitalize() if element not in _valid_symbols: raise ParsingError("Unknown element symbol {} for file {}".format( element, fname)) if check_filename: if not os.path.basename(fname).lower().startswith(element.lower()): raise ParsingError("Filename {0} was recognized for element " "{1}, but the filename does not start " "with {1}".format(fname, element)) parsed_data['element'] = element return parsed_data
def parse_djrepos_from_folder(cls, dirpath, pseudo_type): # pylint: disable=too-many-locals,too-many-branches """Parse the djrepo files in the given directory into a list of data nodes. .. note:: The directory pointed to by `dirpath` should only contain djrepo files. Optionally, it can contain just a single directory, that contains all the djrepo files. If any other files are stored in the basepath or the subdirectory that cannot be successfully parsed as djrepo files the method will raise a `ValueError`. :param dirpath: absolute path to a directory containing djrepos. :return: list of data nodes. :raises ValueError: if `dirpath` is not a directory or contains anything other than files. :raises ValueError: if `dirpath` contains multiple djrepos for the same element. :raises ParsingError: if the constructor of the pseudo type fails for one of the files in the `dirpath`. """ md5s = {} cutoffs = {'low': {}, 'normal': {}, 'high': {}} elements = [] if not os.path.isdir(dirpath): raise ValueError(f'`{dirpath}` is not a directory') dirpath_contents = os.listdir(dirpath) if len(dirpath_contents) == 1 and os.path.isdir(os.path.join(dirpath, dirpath_contents[0])): dirpath = os.path.join(dirpath, dirpath_contents[0]) for filename in os.listdir(dirpath): filepath = os.path.join(dirpath, filename) if not os.path.isfile(filepath): raise ValueError(f'dirpath `{dirpath}` contains at least one entry that is not a file') # Some of the djrepo archives contain extraneous files. Here we skip files with unsupported extensions. if filename.split('.')[-1] not in cls._pseudo_repo_file_extensions: warnings.warn(f'filename {filename} does not have a supported extension. Skipping...') continue try: with open(filepath, 'r') as handle: djrepo = json.load(handle) except ParsingError as exception: raise ParsingError(f'failed to parse `{filepath}`: {exception}') from exception else: match = re.search(r'^([A-Za-z]{1,2})\.\w+', filename) if match is None: raise ParsingError( f'could not parse a valid element symbol from the filename `{filename}`. ' 'It should have the format `ELEMENT.EXTENSION`' ) element = match.group(1) if element in elements: raise ValueError(f'directory `{dirpath}` contains djrepos with duplicate elements`') try: md5 = cls.get_md5_from_djrepo(djrepo, pseudo_type=pseudo_type) except (ParsingError, ValueError) as exception: raise ParsingError(f'failed to parse md5 from djrepo file `{filename}`: {exception}') from exception else: md5s[element] = md5 try: djrepo_cutoffs = cls.get_cutoffs_from_djrepo(djrepo, pseudo_type=pseudo_type) except ParsingError as exception: raise ParsingError( f'failed to parse cutoffs from djrepo file `{filename}`: {exception}' ) from exception else: for stringency in ['low', 'normal', 'high']: cutoffs[stringency][element] = djrepo_cutoffs[stringency] elements.append(element) if (not cutoffs['low']) and (not cutoffs['normal']) and (not cutoffs['high']): raise ValueError(f'no djrepos were parsed from `{dirpath}`') return md5s, cutoffs
def parse_upf(fname, check_filename=True): """ Try to get relevant information from the UPF. For the moment, only the element name. Note that even UPF v.2 cannot be parsed with the XML minidom! (e.g. due to the & characters in the human-readable section). If check_filename is True, raise a ParsingError exception if the filename does not start with the element name. """ import os from aiida.common.exceptions import ParsingError from aiida.common import AIIDA_LOGGER from aiida.orm.nodes.data.structure import _valid_symbols parsed_data = {} try: upf_contents = fname.read() fname = fname.name except AttributeError: with io.open(fname, encoding='utf8') as handle: upf_contents = handle.read() match = REGEX_UPF_VERSION.search(upf_contents) if match: version = match.group('version') AIIDA_LOGGER.debug('Version found: {} for file {}'.format( version, fname)) else: AIIDA_LOGGER.debug('Assuming version 1 for file {}'.format(fname)) version = '1' parsed_data['version'] = version try: version_major = int(version.partition('.')[0]) except ValueError: # If the version string does not contain a dot, fallback # to version 1 AIIDA_LOGGER.debug('Falling back to version 1 for file {}, ' "version string '{}' unrecognized".format( fname, version)) version_major = 1 element = None if version_major == 1: match = REGEX_ELEMENT_V1.search(upf_contents) if match: element = match.group('element_name') else: # all versions > 1 match = REGEX_ELEMENT_V2.search(upf_contents) if match: element = match.group('element_name') if element is None: raise ParsingError( 'Unable to find the element of UPF {}'.format(fname)) element = element.capitalize() if element not in _valid_symbols: raise ParsingError('Unknown element symbol {} for file {}'.format( element, fname)) if check_filename: if not os.path.basename(fname).lower().startswith(element.lower()): raise ParsingError('Filename {0} was recognized for element ' '{1}, but the filename does not start ' 'with {1}'.format(fname, element)) parsed_data['element'] = element return parsed_data
def parse_cell_parameters(txt): """ Return dict containing info from the CELL_PARAMETERS card block in txt. .. note:: This card is only needed if ibrav = 0. Therefore, if the card is not present, the function will return None and not raise an error. .. note:: If the units are unspecified, they will be returned as None. The units interpreted by QE depend on whether or not one of 'celldm(1)' or 'a' is set in &SYSTEM. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A dictionary (if CELL_PARAMETERS is present; else: None) with * units: the units of the lattice vectors (always lower-case) or None * cell: 3x3 list with lattice vectors as rows For example: :: {'units': 'angstrom', 'cell': [[16.9, 0.0, 0.0], [-2.6, 8.0, 0.0], [-2.6, -3.5, 7.2]]} :rtype: dict or None :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for the card block. cell_parameters_block_re = re.compile( r""" ^ [ \t]* CELL_PARAMETERS [ \t]* [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n (?P<block> (?: ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n? ){3} ) """, RE_FLAGS) # Define re for the info contained in the block. atomic_species_re = re.compile( r""" ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n? """, RE_FLAGS) # Find the card block and extract units and the lines of the block. match = cell_parameters_block_re.search(txt) if not match: return None # Use specified units or None if not specified. units = match.group('units') if units is not None: units = units.lower() # Get the string containing the lines of the block. if match.group('block') is None: raise ParsingError( 'The CELL_PARAMETER card block was parsed as empty in\n' + txt) else: blockstr = match.group('block') # Define a small helper function to convert strings of fortran-type floats. fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E')) # Now, extract the lattice vectors. lattice_vectors = [] for match in atomic_species_re.finditer(blockstr): lattice_vectors.append(map(fortfloat, match.groups())) info_dict = dict(units=units, cell=lattice_vectors) return info_dict
def parse_atomic_positions(txt): """ Return a dictionary containing info from the ATOMIC_POSITIONS card block in txt. .. note:: If the units are unspecified, they will be returned as None. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A dictionary with * units: the units of the positions (always lower-case) or None * names: list of the atom names (e.g. ``'Si'``, ``'Si0'``, ``'Si_0'``) * positions: list of the [x, y, z] positions * fixed_coords: list of [x, y, z] (bools) of the force modifications (**Note:** True <--> Fixed, as defined in the ``BasePwCpInputGenerator._if_pos`` method) For example: :: {'units': 'bohr', 'names': ['C', 'O'], 'positions': [[0.0, 0.0, 0.0], [0.0, 0.0, 2.5]] 'fixed_coords': [[False, False, False], [True, True, True]]} :rtype: dictionary :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for the card block. # NOTE: This will match card block lines w/ or w/out force modifications. atomic_positions_block_re = re.compile( r""" ^ [ \t]* ATOMIC_POSITIONS [ \t]* [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n (?P<block> (?: ^ [ \t]* (?: \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+ (?:[ \t]+ [{(]? [ \t]* [01] [ \t]+ [01] [ \t]+ [01] [ \t]* [)}]?)? ) [ \t]* $\n? )+ ) """, RE_FLAGS) # Define re for atomic positions without force modifications. atomic_positions_re = re.compile( r""" ^ [ \t]* (?P<name>\S+) [ \t]+ (?P<x>\S+) [ \t]+ (?P<y>\S+) [ \t]+ (?P<z>\S+) [ \t]* $\n? """, RE_FLAGS) # Define re for atomic positions with force modifications. atomic_positions_constraints_re = re.compile( r""" ^ [ \t]* (?P<name>\S+) [ \t]+ (?P<x>\S+) [ \t]+ (?P<y>\S+) [ \t]+ (?P<z>\S+) [ \t]+ [{(]? [ \t]* (?P<if_pos1>[01]) [ \t]+ (?P<if_pos2>[01]) [ \t]+ (?P<if_pos3>[01]) [ \t]* [)}]? [ \t]* $\n? """, RE_FLAGS) # Find the card block and extract units and the lines of the block. match = atomic_positions_block_re.search(txt) if not match: raise ParsingError( 'The ATOMIC_POSITIONS card block was not found in\n' + txt) # Get the units. If they are not found, match.group('units') will be None. units = match.group('units') if units is not None: units = units.lower() # Get the string containing the lines of the block. if match.group('block') is None: raise ParsingError( 'The ATOMIC_POSITIONS card block was parsed as empty in\n' + txt) else: blockstr = match.group('block') # Define a small helper function to convert if_pos strings to bools that # correspond to the mapping of BasePwCpInputGenerator._if_pos method. def str01_to_bool(s): """ Map strings '0', '1' strings to bools: '0' --> True; '1' --> False. While this is opposite to the QE standard, this mapping is what needs to be passed to aiida in a 'settings' ParameterData object. (See the _if_pos method of BasePwCpInputGenerator) """ if s == '0': return True elif s == '1': return False else: raise ParsingError( 'Unable to convert if_pos = {} to bool'.format(s)) # Define a small helper function to convert strings of fortran-type floats. fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E')) # Parse the lines of the card block, extracting an atom name, position # and fixed coordinates. names, positions, fixed_coords = [], [], [] # First, try using the re for lines without force modifications. Set the # default force modification to the default (True) for each atom. for match in atomic_positions_re.finditer(blockstr): names.append(match.group('name')) positions.append(map(fortfloat, match.group('x', 'y', 'z'))) fixed_coords.append(3 * [False]) # False <--> not fixed (the default) # Next, try using the re for lines with force modifications. for match in atomic_positions_constraints_re.finditer(blockstr): names.append(match.group('name')) positions.append(map(fortfloat, match.group('x', 'y', 'z'))) if_pos123 = match.group('if_pos1', 'if_pos2', 'if_pos3') fixed_coords.append(map(str01_to_bool, if_pos123)) # Check that the number of atomic positions parsed is equal to the number of # lines in blockstr n_lines = len(blockstr.rstrip().split('\n')) if len(names) != n_lines: raise ParsingError( 'Only {} atomic positions were parsed from the {} lines of the ' 'ATOMIC_POSITIONS card block:\n{}'.format(len(names), n_lines, blockstr)) info_dict = dict(units=units, names=names, positions=positions, fixed_coords=fixed_coords) return info_dict
def parse_basis(fname): """get relevant information from the basis file :param fname: the file path :return: (metadata_dict, content_str) - The basis file must contain one basis set in the CRYSTAL17 format - blank lines and lines beginning '#' will be ignored - the file can also start with a fenced (with ---), yaml formatted header section - Note keys should not contain '.'s Example :: # an ignored comment --- author: J Smith year: 1999 --- 8 2 1 0 3 2. 0. 1 1 3 6. 0. """ from aiida.common.exceptions import ParsingError meta_data = {} in_yaml = False yaml_lines = [] protected_keys = [ "atomic_number", "num_shells", "element", "basis_type", "content" ] parsing_data = False content = [] with open(fname) as f: for line in f: # ignore commented and blank lines if line.strip().startswith("#") or not line.strip(): continue if line.strip() == "---" and not parsing_data: if not in_yaml: in_yaml = True continue else: head_data = yaml.load("".join(yaml_lines)) head_data = {} if not head_data else head_data if not isinstance(head_data, dict): raise ParsingError( "the header data could not be read for file: {}". format(fname)) if set(head_data.keys()).intersection(protected_keys): raise ParsingError( "the header data contained a forbidden key(s) {} for file: {}" .format(protected_keys, fname)) meta_data = head_data in_yaml = False parsing_data = True continue if in_yaml: yaml_lines.append(line) continue parsing_data = True if not content: atomic_number, basis_type, num_shells, line = _parse_first_line( line, fname) meta_data["atomic_number"] = atomic_number meta_data["element"] = ATOMIC_NUM2SYMBOL[atomic_number] meta_data["basis_type"] = basis_type meta_data["num_shells"] = num_shells content.append(line) if not content: raise ParsingError( "The basis set file contains no content: {}".format(fname)) validate_basis_string("".join(content)) return meta_data, "".join(content)
def parse_k_points(txt): """ Return a dictionary containing info from the K_POINTS card block in txt. .. note:: If the type of kpoints (where type = x in the card header, "K_POINTS x") is not present, type will be returned as 'tpiba', the QE default. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A dictionary containing * type: the type of kpoints (always lower-case) * points: an Nx3 list of the kpoints (will not be present if type = 'gamma' or type = 'automatic') * weights: a 1xN list of the kpoint weights (will not be present if type = 'gamma' or type = 'automatic') * mesh: a 1x3 list of the number of equally-spaced points in each direction of the Brillouin zone, as in Monkhorst-Pack grids (only present if type = 'automatic') * offset: a 1x3 list of the grid offsets in each direction of the Brillouin zone (only present if type = 'automatic') (**Note:** The offset value for each direction will be *one of* ``0.0`` [no offset] *or* ``0.5`` [offset by half a grid step]. This differs from the Quantum Espresso convention, where an offset value of ``1`` corresponds to a half-grid-step offset, but adheres to the current AiiDa convention. Examples: :: {'type': 'crystal', 'points': [[0.125, 0.125, 0.0], [0.125, 0.375, 0.0], [0.375, 0.375, 0.0]], 'weights': [1.0, 2.0, 1.0]} {'type': 'automatic', 'points': [8, 8, 8], 'offset': [0.0, 0.5, 0.0]} {'type': 'gamma'} :rtype: dictionary :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for the special-type card block. k_points_special_block_re = re.compile( r""" ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* (?P<type>\S+?)? [ \t]* [)}]? [ \t]* $\n ^ [ \t]* \S+ [ \t]* $\n # nks (?P<block> (?: ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n? )+ ) """, RE_FLAGS) # Define re for the info contained in the special-type block. k_points_special_re = re.compile( r""" ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n? """, RE_FLAGS) # Define re for the automatic-type card block and its line of info. k_points_automatic_block_re = re.compile( r""" ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* automatic [ \t]* [)}]? [ \t]* $\n ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n? """, RE_FLAGS) # Define re for the gamma-type card block. (There is no block info.) k_points_gamma_block_re = re.compile( r""" ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* gamma [ \t]* [)}]? [ \t]* $\n """, RE_FLAGS) # Try finding the card block using all three types. info_dict = {} match = k_points_special_block_re.search(txt) if match: if match.group('type') is not None: info_dict['type'] = match.group('type').lower() else: info_dict['type'] = 'tpiba' blockstr = match.group('block') points = [] weights = [] for match in k_points_special_re.finditer(blockstr): points.append(map(float, match.group(1, 2, 3))) weights.append(float(match.group(4))) info_dict['points'] = points info_dict['weights'] = weights else: match = k_points_automatic_block_re.search(txt) if match: info_dict['type'] = 'automatic' info_dict['points'] = map(int, match.group(1, 2, 3)) info_dict['offset'] = [ 0. if x == 0 else 0.5 for x in map(int, match.group(4, 5, 6)) ] else: match = k_points_gamma_block_re.search(txt) if match: info_dict['type'] = 'gamma' else: raise ParsingError('K_POINTS card not found in\n' + txt) return info_dict
def parse_atomic_species(txt): """ Return a dictionary containing info from the ATOMIC_SPECIES card block in txt. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A dictionary with * names: list of the atom names (e.g. 'Si', 'Si0', 'Si_0') (case as-is) * masses: list of the masses of the atoms in 'names' * pseudo_file_names: list of the pseudopotential file names for the atoms in 'names' (case as-is) Example: :: {'names': ['Li', 'O', 'Al', 'Si'], 'masses': [6.941, 15.9994, 26.98154, 28.0855], 'pseudo_file_names': ['Li.pbe-sl-rrkjus_psl.1.0.0.UPF', 'O.pbe-nl-rrkjus_psl.1.0.0.UPF', 'Al.pbe-nl-rrkjus_psl.1.0.0.UPF', 'Si3 28.0855 Si.pbe-nl-rrkjus_psl.1.0.0.UPF'] :rtype: dictionary :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for atomic species card block. atomic_species_block_re = re.compile( r""" ^ [ \t]* ATOMIC_SPECIES [ \t]* $\n (?P<block> (?: ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n? )+ ) """, RE_FLAGS) # Define re for the info contained in the block. atomic_species_re = re.compile( r""" ^ [ \t]* (?P<name>\S+) [ \t]+ (?P<mass>\S+) [ \t]+ (?P<pseudo>\S+) [ \t]* $\n? """, RE_FLAGS) # Find the card block and extract units and the lines of the block. try: match = atomic_species_block_re.search(txt) except AttributeError: raise ParsingError('The ATOMIC_SPECIES card block was not found in\n' + txt) # Make sure the card block lines were extracted. If they were, store the # string of lines as blockstr. if match.group('block') is None: raise ParsingError( 'The ATOMIC_POSITIONS card block was parse as empty in\n' + txt) else: blockstr = match.group('block') # Define a small helper function to convert strings of fortran-type floats. fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E')) # Now, extract the name, mass, and pseudopotential file name from each line # of the card block. names, masses, pseudo_fnms = [], [], [] for match in atomic_species_re.finditer(blockstr): names.append(match.group('name')) masses.append(fortfloat(match.group('mass'))) pseudo_fnms.append(match.group('pseudo')) info_dict = dict(names=names, masses=masses, pseudo_file_names=pseudo_fnms) return info_dict
def parse_basis(basis_file): """Get relevant information from the basis file. :param basis_file: absolute path to a file or open filelike object :return: (metadata_dict, content_str) - The basis file must contain one basis set in the CRYSTAL17 format - blank lines and lines beginning '#' will be ignored - the file can also start with a fenced (with ---), yaml formatted header section (Note keys should not contain '.'s) Example :: # an ignored comment --- author: J Smith year: 1999 --- 8 2 1 0 3 2. 0. 1 1 3 6. 0. """ meta_data = {} in_yaml = False yaml_lines = [] protected_keys = ["atomic_number", "num_shells", "element", "basis_type", "content"] parsing_data = False content = [] if isinstance(basis_file, str): basis_file = pathlib.Path(basis_file) if isinstance(basis_file, pathlib.Path): contentlines = basis_file.read_text().splitlines() basis_file_name = basis_file.name else: basis_file.seek(0) contentlines = basis_file.read().splitlines() try: basis_file_name = basis_file.name except AttributeError: basis_file_name = "StringIO" for line in contentlines: # ignore commented and blank lines if line.strip().startswith("#") or not line.strip(): continue if line.strip() == "---" and not parsing_data: if not in_yaml: in_yaml = True continue else: head_data = yaml.safe_load("\n".join(yaml_lines)) head_data = {} if not head_data else head_data if not isinstance(head_data, dict): raise ParsingError( "the header data could not be read for file: {}".format( basis_file_name ) ) if set(head_data.keys()).intersection(protected_keys): raise ParsingError( "the header data contained a forbidden key(s) " "{} for file: {}".format(protected_keys, basis_file_name) ) meta_data = head_data in_yaml = False parsing_data = True continue if in_yaml: yaml_lines.append(line) continue parsing_data = True content.append(line.strip()) data = parse_bsets_stdin("\n".join(content), isolated=True) if len(data) > 1: raise ParsingError( "the basis set string contains more than one basis set: {}".format( list(data.keys()) ) ) atomic_symbol = list(data.keys())[0] meta_data["atomic_number"] = atomic_number = SYMBOLS_R[atomic_symbol] meta_data["element"] = atomic_symbol meta_data["basis_type"] = basis_type = data[atomic_symbol]["type"] meta_data["num_shells"] = num_shells = len(data[atomic_symbol]["bs"]) meta_data["orbital_types"] = [o["type"] for o in data[atomic_symbol]["bs"]] # the input atomic number may be > 100, but we should standardise this in the stored file first_line = content[0].strip().split() if len(first_line) != 2 or first_line[1] != str(num_shells): raise ParsingError( "The first line should contain only the atomic id and num shells ({}): '{}' for file {}".format( num_shells, line, basis_file_name ) ) newline = "{0} {1}".format( atomic_number if basis_type == "all-electron" else 200 + atomic_number, num_shells, ) content[0] = newline return meta_data, "\n".join(content)
def get_structuredata(self): """ Return a StructureData object based on the data in the input file. This uses all of the data in the input file to do the necessary unit conversion, ect. and then creates an AiiDa StructureData object. All of the names corresponding of the Kind objects composing the StructureData object will match those found in the ATOMIC_SPECIES block, so the pseudopotentials can be linked to the calculation using the kind.name for each specific type of atom (in the event that you wish to use different pseudo's for two or more of the same atom). :return: StructureData object of the structure in the input file :rtype: aiida.orm.data.structure.StructureData :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # CELL_PARAMETERS are present. if self.cell_parameters is None: raise ParsingError( 'CELL_PARAMETERS not found while parsing the input file. This ' 'card is needed for AiiDa.') # Figure out the factor needed to convert the lattice vectors # to Angstroms. # TODO: ***ASK GEORGE IF I SHOULD MULTIPLY OR DIVIDE BY ALAT*** cell_units = self.cell_parameters.get('units') if (cell_units == 'alat') or (cell_units is None): # Try to determine the value of alat from the namelist. celldm1 = self.namelists['SYSTEM'].get('celldm(1)') a = self.namelists['SYSTEM'].get('a') # Only one of 'celldm(1)' or 'a' can be set. if (celldm1 is not None) and (a is not None): raise ParsingError( "Both 'celldm(1)' and 'a' were set in the input file.") elif celldm1 is not None: cell_conv_factor = celldm1 * bohr_to_ang # celldm(1) in Bohr elif a is not None: cell_conv_factor = a # a is in Angstroms else: if cell_units is None: cell_conv_factor = bohr_to_ang # QE assumes Bohr else: raise ParsingError( "Unable to determine the units of the lattice vectors." ) elif cell_units == 'bohr': cell_conv_factor = bohr_to_ang elif cell_units == 'angstrom': cell_conv_factor = 1.0 else: raise ParsingError( "Unable to determine the units of the lattice vectors.") # Get the lattice vectors and convert them to units of Angstroms. cell = np.array(self.cell_parameters['cell']) * cell_conv_factor # Get the positions and convert them to [x, y, z] Angstrom vectors. pos_units = self.atomic_positions['units'] positions = np.array(self.atomic_positions['positions']) if pos_units in (None, 'alat'): # QE assumes alat alat = np.linalg.norm(cell[0]) # Cell in Ang, so alat in Ang positions *= alat elif pos_units == 'bohr': positions = positions * bohr_to_ang elif pos_units == 'angstrom': pass elif pos_units == 'crystal': positions = np.dot(positions, cell) # rotate into [x y z] basis else: raise ParsingError( 'Unable to determine to convert positions to [x y z] Angstrom.' ) # Get the atom names corresponding to positions. names = self.atomic_positions['names'] # Create a dictionary that maps an atom name to it's mass. mass_dict = dict( zip(self.atomic_species['names'], self.atomic_species['masses'])) # Use the names to figure out the atomic symbols. symbols = [] for name in names: candiates = [ s for s in _valid_symbols if name.lower().startswith(s.lower()) ] if len(candiates) == 0: raise ParsingError( 'Unable to figure out the element represented by the ' 'label, {}, in the input file.'.format(name)) # Choose the longest match, since, for example, S and Si match Si. symbols.append(max(candiates, key=lambda x: len(x))) # Now that we have the names and their corresponding symbol and mass, as # well as the positions and cell in units of Angstroms, we create the # StructureData object. structuredata = StructureData(cell=cell) for name, symbol, position in zip(names, symbols, positions): mass = mass_dict[name] structuredata.append_atom(name=name, symbols=symbol, position=position, mass=mass) return structuredata