def __init__(self, pwinput): """ Parse inputs's namelist and cards to create attributes of the info. :param pwinput: Any one of the following * A string of the (existing) absolute path to the pwinput file. * A single string containing the pwinput file's text. * A list of strings, with the lines of the file as the elements. * A file object. (It will be opened, if it isn't already.) :raises IOError: if ``pwinput`` is a file and there is a problem reading the file. :raises TypeError: if ``pwinput`` is a list containing any non-string element(s). :raises aiida.common.exceptions.ParsingError: if there are issues parsing the pwinput. """ # Get the text of the pwinput file as a single string. # File. if isinstance(pwinput, file_types): try: self.input_txt = pwinput.read() except IOError: raise IOError('Unable to open the provided pwinput, {}' ''.format(file.name)) # List. elif isinstance(pwinput, list): if all(isinstance(s, basestring) for s in pwinput): self.input_txt = ''.join(pwinput) else: raise TypeError( 'You provided a list to parse, but some elements were not ' 'strings. Each element should be a string containing a line' 'of the pwinput file.') # Path or string of the text. elif isinstance(pwinput, basestring): if os.path.isfile(pwinput): if os.path.isabs(pwinput): with open(pwinput) as f: self.input_txt = f.read() else: raise IOError( 'Please provide the absolute path to an existing ' 'pwinput file.') else: self.input_txt = pwinput else: raise TypeError("Unknown type for input 'pwinput': {}".format( type(pwinput))) # Check that pwinput is not empty. if len(self.input_txt.strip()) == 0: raise ParsingError('The pwinput provided was empty!')
def str01_to_bool(s): """ Map strings '0', '1' strings to bools: '0' --> True; '1' --> False. While this is opposite to the QE standard, this mapping is what needs to be passed to aiida in a 'settings' ParameterData object. (See the _if_pos method of BasePwCpInputGenerator) """ if s == '0': return True elif s == '1': return False else: raise ParsingError( 'Unable to convert if_pos = "{}" to bool'.format(s))
def parse_k_points(txt): """ Return a dictionary containing info from the K_POINTS card block in txt. .. note:: If the type of kpoints (where type = x in the card header, "K_POINTS x") is not present, type will be returned as 'tpiba', the QE default. :param txt: A single string containing the QE input text to be parsed. :returns: A dictionary containing * type: the type of kpoints (always lower-case) * points: an Nx3 list of the kpoints (will not be present if type = 'gamma' or type = 'automatic') * weights: a 1xN list of the kpoint weights (will not be present if type = 'gamma' or type = 'automatic') * mesh: a 1x3 list of the number of equally-spaced points in each direction of the Brillouin zone, as in Monkhorst-Pack grids (only present if type = 'automatic') * offset: a 1x3 list of the grid offsets in each direction of the Brillouin zone (only present if type = 'automatic') (**Note:** The offset value for each direction will be *one of* ``0.0`` [no offset] *or* ``0.5`` [offset by half a grid step]. This differs from the Quantum Espresso convention, where an offset value of ``1`` corresponds to a half-grid-step offset, but adheres to the current AiiDa convention. Examples:: {'type': 'crystal', 'points': [[0.125, 0.125, 0.0], [0.125, 0.375, 0.0], [0.375, 0.375, 0.0]], 'weights': [1.0, 2.0, 1.0]} {'type': 'automatic', 'points': [8, 8, 8], 'offset': [0.0, 0.5, 0.0]} {'type': 'gamma'} :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for the special-type card block. k_points_special_block_re = re.compile( r""" ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* (?P<type>\S+?)? [ \t]* [)}]? [ \t]* $\n ^ [ \t]* \S+ [ \t]* $\n # nks (?P<block> (?: ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n? )+ ) """, RE_FLAGS) # Define re for the info contained in the special-type block. k_points_special_re = re.compile( r""" ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n? """, RE_FLAGS) # Define re for the automatic-type card block and its line of info. k_points_automatic_block_re = re.compile( r""" ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* automatic [ \t]* [)}]? [ \t]* $\n ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n? """, RE_FLAGS) # Define re for the gamma-type card block. (There is no block info.) k_points_gamma_block_re = re.compile( r""" ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* gamma [ \t]* [)}]? [ \t]* $\n """, RE_FLAGS) # Try finding the card block using all three types. info_dict = {} match = k_points_special_block_re.search(txt) if match: if match.group('type') is not None: info_dict['type'] = match.group('type').lower() else: info_dict['type'] = 'tpiba' blockstr = match.group('block') points = [] weights = [] for match in k_points_special_re.finditer(blockstr): points.append(list(map(float, match.group(1, 2, 3)))) weights.append(float(match.group(4))) info_dict['points'] = points info_dict['weights'] = weights else: match = k_points_automatic_block_re.search(txt) if match: info_dict['type'] = 'automatic' info_dict['points'] = list(map(int, match.group(1, 2, 3))) info_dict['offset'] = [ 0. if x == 0 else 0.5 for x in map(int, match.group(4, 5, 6)) ] else: match = k_points_gamma_block_re.search(txt) if match: info_dict['type'] = 'gamma' else: raise ParsingError('K_POINTS card not found in\n' + txt) return info_dict
def parse_atomic_species(txt): """ Return a dictionary containing info from the ATOMIC_SPECIES card block in txt. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A dictionary with * names: list of the atom names (e.g. 'Si', 'Si0', 'Si_0') (case as-is) * masses: list of the masses of the atoms in 'names' * pseudo_file_names: list of the pseudopotential file names for the atoms in 'names' (case as-is) Example:: {'names': ['Li', 'O', 'Al', 'Si'], 'masses': [6.941, 15.9994, 26.98154, 28.0855], 'pseudo_file_names': ['Li.pbe-sl-rrkjus_psl.1.0.0.UPF', 'O.pbe-nl-rrkjus_psl.1.0.0.UPF', 'Al.pbe-nl-rrkjus_psl.1.0.0.UPF', 'Si.pbe-nl-rrkjus_psl.1.0.0.UPF'] :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for atomic species card block. atomic_species_block_re = re.compile( r""" ^ [ \t]* ATOMIC_SPECIES [ \t]* $\n (?P<block> (?: ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n? )+ ) """, RE_FLAGS) # Define re for the info contained in the block. atomic_species_re = re.compile( r""" ^ [ \t]* (?P<name>\S+) [ \t]+ (?P<mass>\S+) [ \t]+ (?P<pseudo>\S+) [ \t]* $\n? """, RE_FLAGS) # Find the card block and extract units and the lines of the block. try: match = atomic_species_block_re.search(txt) except AttributeError: raise ParsingError('The ATOMIC_SPECIES card block was not found in\n' + txt) # Make sure the card block lines were extracted. If they were, store the # string of lines as blockstr. if match.group('block') is None: raise ParsingError( 'The ATOMIC_POSITIONS card block was parse as empty in\n' + txt) else: blockstr = match.group('block') # Define a small helper function to convert strings of fortran-type floats. fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E')) # Now, extract the name, mass, and pseudopotential file name from each line # of the card block. names, masses, pseudo_fnms = [], [], [] for match in atomic_species_re.finditer(blockstr): names.append(match.group('name')) masses.append(fortfloat(match.group('mass'))) pseudo_fnms.append(match.group('pseudo')) info_dict = dict(names=names, masses=masses, pseudo_file_names=pseudo_fnms) return info_dict
def parse_cell_parameters(txt): """ Return dict containing info from the CELL_PARAMETERS card block in txt. .. note:: This card is only needed if ibrav = 0. Therefore, if the card is not present, the function will return None and not raise an error. .. note:: If the units are unspecified, they will be returned as None. The units interpreted by QE depend on whether or not one of 'celldm(1)' or 'a' is set in &SYSTEM. :param txt: A single string containing the QE input text to be parsed. :returns: A dictionary (if CELL_PARAMETERS is present; else: None) with * units: the units of the lattice vectors (always lower-case) or None * cell: 3x3 list with lattice vectors as rows For example:: {'units': 'angstrom', 'cell': [[16.9, 0.0, 0.0], [-2.6, 8.0, 0.0], [-2.6, -3.5, 7.2]]} :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # Define re for the card block. cell_parameters_block_re = re.compile( r""" ^ [ \t]* CELL_PARAMETERS [ \t]* [{(]? \s* (?P<units>[a-z]*) \s* [)}]? \s* [\n] (?P<block> ( ( \s* # White space in front of the element spec is ok ( # First number ( [-|+]? # Plus or minus in front of the number (optional) (\d* # optional decimal in the beginning .0001 is ok, for example [\.] # There has to be a dot followed by \d+) # at least one decimal | # OR (\d+ # at least one decimal, followed by [\.]? # an optional dot \d*) # followed by optional decimals ([E|e|d|D][+|-]?\d+)? # optional exponents E+03, e-05, d0, D0 ( \s+ # White space between numbers [-|+]? # Plus or minus in front of the number (optional) (\d* # optional decimal in the beginning .0001 is ok, for example [\.] # There has to be a dot followed by \d+) # at least one decimal | # OR (\d+ # at least one decimal, followed by [\.]? # an optional dot \d*) # followed by optional decimals ([E|e|d|D][+|-]?\d+)? # optional exponents E+03, e-05, d0, D0 ){2} # I expect three float values ) | \# | ! # If a line is commented out, that is also ok ) .* # I do not care what is after the comment or the vector | # OR \s* # A line only containing white space ) [\n] # line break at the end ){3} # I need exactly 3 vectors ) """, RE_FLAGS) cell_vector_regex = re.compile( r""" ^ # Linestart [ \t]* # Optional white space (?P<x> # Get x [\-|\+]? ( \d*[\.]\d+ | \d+[\.]?\d*) ([E|e|d|D][+|-]?\d+)? ) [ \t]+ (?P<y> # Get y [\-|\+]? (\d*[\.]\d+ | \d+[\.]?\d*) ([E|e|d|D][+|-]?\d+)? ) [ \t]+ (?P<z> # Get z [\-|\+]? (\d*[\.]\d+ | \d+[\.]?\d*) ([E|e|d|D][+|-]?\d+)? ) """, re.X | re.M) #~ cell_parameters_block_re = re.compile(r""" #~ ^ [ \t]* CELL_PARAMETERS [ \t]* #~ [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n #~ (?P<block> #~ (?: #~ ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n? #~ ){3} #~ ) #~ """, RE_FLAGS) # Define re for the info contained in the block. #~ atomic_species_re = re.compile(r""" #~ ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n? #~ """, RE_FLAGS) # Find the card block and extract units and the lines of the block. match = cell_parameters_block_re.search(txt) if not match: return None # Use specified units or None if not specified. units = match.group('units') if units is not None: units = units.lower() # Get the string containing the lines of the block. if match.group('block') is None: raise ParsingError( 'The CELL_PARAMETER card block was parsed as empty in\n' + txt) else: blockstr = match.group('block') # Define a small helper function to convert strings of fortran-type floats. fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E')) # Now, extract the lattice vectors. lattice_vectors = [] for match in cell_vector_regex.finditer(blockstr): lattice_vectors.append( list( map(fortfloat, (match.group('x'), match.group('y'), match.group('z'))))) info_dict = dict(units=units, cell=lattice_vectors) return info_dict
def parse_atomic_positions(txt): """ Return a dictionary containing info from the ATOMIC_POSITIONS card block in txt. .. note:: If the units are unspecified, they will be returned as None. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A dictionary with * units: the units of the positions (always lower-case) or None * names: list of the atom names (e.g. ``'Si'``, ``'Si0'``, ``'Si_0'``) * positions: list of the [x, y, z] positions * fixed_coords: list of [x, y, z] (bools) of the force modifications (**Note:** True <--> Fixed, as defined in the ``BasePwCpInputGenerator._if_pos`` method) For example:: {'units': 'bohr', 'names': ['C', 'O'], 'positions': [[0.0, 0.0, 0.0], [0.0, 0.0, 2.5]] 'fixed_coords': [[False, False, False], [True, True, True]]} :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ def str01_to_bool(s): """ Map strings '0', '1' strings to bools: '0' --> True; '1' --> False. While this is opposite to the QE standard, this mapping is what needs to be passed to aiida in a 'settings' ParameterData object. (See the _if_pos method of BasePwCpInputGenerator) """ if s == '0': return True elif s == '1': return False else: raise ParsingError( 'Unable to convert if_pos = "{}" to bool'.format(s)) # Define re for the card block. # NOTE: This will match card block lines w/ or w/out force modifications. atomic_positions_block_re = re.compile( r""" ^ \s* ATOMIC_POSITIONS \s* # Atomic positions start with that string [{(]? \s* (?P<units>\S+?)? \s* [)}]? \s* $\n # The units are after the string in optional brackets (?P<block> # This is the block of positions ( ( \s* # White space in front of the element spec is ok ( [A-Za-z]+[A-Za-z0-9]{0,2} # Element spec ( \s+ # White space in front of the number [-|+]? # Plus or minus in front of the number (optional) ( ( \d* # optional decimal in the beginning .0001 is ok, for example [\.] # There has to be a dot followed by \d+ # at least one decimal ) | # OR ( \d+ # at least one decimal, followed by [\.]? # an optional dot ( both 1 and 1. are fine) \d* # And optional number of decimals (1.00001) ) # followed by optional decimals ) ([E|e|d|D][+|-]?\d+)? # optional exponents E+03, e-05 ){3} # I expect three float values ((\s+[0-1]){3}\s*)? # Followed by optional ifpos \s* # Followed by optional white space | \#.* # If a line is commented out, that is also ok | \!.* # Comments also with excl. mark in fortran ) | # OR \s* # A line only containing white space ) [\n] # line break at the end )+ # A positions block should be one or more lines ) """, re.X | re.M) atomic_positions_block_re_ = re.compile( r""" ^ [ \t]* ATOMIC_POSITIONS [ \t]* [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n (?P<block> (?: ^ [ \t]* (?: \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+ (?:[ \t]+ [{(]? [ \t]* [01] [ \t]+ [01] [ \t]+ [01] [ \t]* [)}]?)? ) [ \t]* $\n? )+ ) """, RE_FLAGS) # Define re for atomic positions without force modifications. atomic_positions_w_constraints_re = re.compile( r""" ^ # Linestart [ \t]* # Optional white space (?P<name>[A-Za-z]+[A-Za-z0-9]{0,2})\s+ # get the symbol, max 3 chars, starting with a char (?P<x> # Get x [\-|\+]?(\d*[\.]\d+ | \d+[\.]?\d*) ([E|e|d|D][+|-]?\d+)? ) [ \t]+ (?P<y> # Get y [\-|\+]?(\d*[\.]\d+ | \d+[\.]?\d*) ([E|e|d|D][+|-]?\d+)? ) [ \t]+ (?P<z> # Get z [\-|\+]?(\d*[\.]\d+ | \d+[\.]?\d*) ([E|e|d|D][+|-]?\d+)? ) [ \t]* (?P<fx>[01]?) # Get fx [ \t]* (?P<fy>[01]?) # Get fx [ \t]* (?P<fz>[01]?) # Get fx """, re.X | re.M) # Find the card block and extract units and the lines of the block. match = atomic_positions_block_re.search(txt) if not match: raise ParsingError( 'The ATOMIC_POSITIONS card block was not found in\n' + txt) # Get the units. If they are not found, match.group('units') will be None. units = match.group('units') if units is not None: units = units.lower() # Get the string containing the lines of the block. if match.group('block') is None: raise ParsingError( 'The ATOMIC_POSITIONS card block was parsed as empty in\n' + txt) else: blockstr = match.group('block') # Define a small helper function to convert if_pos strings to bools that # correspond to the mapping of BasePwCpInputGenerator._if_pos method. # Define a small helper function to convert strings of fortran-type floats. fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E')) # Parse the lines of the card block, extracting an atom name, position # and fixed coordinates. names, positions, fixed_coords = [], [], [] # First, try using the re for lines without force modifications. Set the # default force modification to the default (True) for each atom. # PROBLEM this changes the order of the atoms, which is unwanted! #~ for match in atomic_positions_re.finditer(blockstr): #~ names.append(match.group('name')) #~ positions.append(map(fortfloat, match.group('x', 'y', 'z'))) #~ fixed_coords.append(3 * [False]) # False <--> not fixed (the default) # Next, try using the re for lines with force modifications. for match in atomic_positions_w_constraints_re.finditer(blockstr): positions.append(list(map(fortfloat, match.group('x', 'y', 'z')))) fixed_coords_this_pos = [ f or '1' for f in match.group('fx', 'fy', 'fz') ] # False <--> not fixed (the default) fixed_coords.append(list(map(str01_to_bool, fixed_coords_this_pos))) names.append(match.group('name')) # Check that the number of atomic positions parsed is equal to the number of # lines in blockstr # LK removed this check since lines can be commented out, and that is fine. # n_lines = len(blockstr.rstrip().split('\n')) #~ if len(names) != n_lines: #~ raise ParsingError( #~ 'Only {} atomic positions were parsed from the {} lines of the ' #~ 'ATOMIC_POSITIONS card block:\n{}'.format(len(names), n_lines, #~ blockstr) #~ ) info_dict = dict(units=units, names=names, positions=positions, fixed_coords=fixed_coords) return info_dict
def parse_namelists(txt): """ Parse txt to extract a dictionary of the namelist info. :param txt: A single string containing the QE input text to be parsed. :type txt: str :returns: A nested dictionary of the namelists and their key-value pairs. The namelists will always be upper-case keys, while the parameter keys will always be lower-case. For example:: {"CONTROL": {"calculation": "bands", "prefix": "al", "pseudo_dir": "./pseudo", "outdir": "./out"}, "ELECTRONS": {"diagonalization": "cg"}, "SYSTEM": {"nbnd": 8, "ecutwfc": 15.0, "celldm(1)": 7.5, "ibrav": 2, "nat": 1, "ntyp": 1} } :raises aiida.common.exceptions.ParsingError: if there are issues parsing the input. """ # TODO: Incorporate support for algebraic expressions? # Define the re to match a namelist and extract the info from it. namelist_re = re.compile( r""" ^ [ \t]* &(\S+) [ \t]* $\n # match line w/ nmlst tag; save nmlst name ( [\S\s]*? # match any line non-greedily ) # save the group of text between nmlst ^ [ \t]* / [ \t]* $\n # match line w/ "/" as only non-whitespace char """, re.M | re.X) # Define the re to match and extract all of the key = val pairs inside # a block of namelist text. key_value_re = re.compile( r""" [ \t]* (\S+?) [ \t]* # match and store key = # equals sign separates key and value [ \t]* (\S+?) [ \t]* # match and store value [\n,] # return or comma separates "key = value" pairs """, re.M | re.X) # Scan through the namelists... params_dict = {} for nmlst, blockstr in namelist_re.findall(txt): # ...extract the key value pairs, storing them each in nmlst_dict,... nmlst_dict = {} for key, valstr in key_value_re.findall(blockstr): nmlst_dict[key.lower()] = str2val(valstr) # ...and, store nmlst_dict as a value in params_dict with the namelist # as the key. if len(nmlst_dict.keys()) > 0: params_dict[nmlst.upper()] = nmlst_dict if len(params_dict) == 0: raise ParsingError( 'No data was found while parsing the namelist in the following ' 'text\n' + txt) # TODO: uppercase correct return params_dict
def __init__(self, pwinput): """ Parse inputs's namelist and cards to create attributes of the info. :param pwinput: Any one of the following * A string of the (existing) absolute path to the pwinput file. * A single string containing the pwinput file's text. * A list of strings, with the lines of the file as the elements. * A file object. (It will be opened, if it isn't already.) :raises IOError: if ``pwinput`` is a file and there is a problem reading the file. :raises TypeError: if ``pwinput`` is a list containing any non-string element(s). :raises qe_tools.utils.exceptions.ParsingError: if there are issues parsing the pwinput. """ # Get the text of the pwinput file as a single string. # File. if isinstance(pwinput, file_types): try: self.input_txt = pwinput.read() except IOError: raise IOError('Unable to open the provided pwinput, {}' ''.format(file.name)) # List. elif isinstance(pwinput, list): if all(isinstance(s, basestring) for s in pwinput): self.input_txt = ''.join(pwinput) else: raise TypeError( 'You provided a list to parse, but some elements were not ' 'strings. Each element should be a string containing a line' 'of the pwinput file.') # Path or string of the text. elif isinstance(pwinput, basestring): if os.path.isfile(pwinput): if os.path.isabs(pwinput): with open(pwinput) as f: self.input_txt = f.read() else: raise IOError( 'Please provide the absolute path to an existing ' 'pwinput file.') else: self.input_txt = pwinput else: raise TypeError("Unknown type for input 'pwinput': {}".format( type(pwinput))) # Check that pwinput is not empty. if len(self.input_txt.strip()) == 0: raise ParsingError('The pwinput provided was empty!') # Take care explicitly of Windows newlines: \r\n # (open would do it automatically, but if the uses passes a string # this would not be done properly) self.input_txt = self.input_txt.replace('\r\n', '\n') # This is instead for Mac <=9 (hopefully nobody still uses it, but # who knows) that just used \r self.input_txt = self.input_txt.replace('\r', '\n') # Add a newline, as a partial fix to #15 self.input_txt += "\n"