def parse_ion(stream):
    """
    Try to get relevant information from the .ion. For the moment, only the
    element symbol, name, mass and atomic number.
    Raise a ParsingError exception if the file does not contain the element symbol
    or the atomic number. The presence in the file of mass and name is, instead, considered
    optional. If not present, None is returned.
    """
    from xml.etree.ElementTree import ElementTree
    from aiida.common.exceptions import ParsingError
    from aiida.orm.nodes.data.structure import _valid_symbols

    parsed_data = {}

    el_tr = ElementTree(None, stream)
    root = el_tr.getroot()

    if root.find('symbol') is None or root.find('z') is None or root.find(
            'label') is None:
        raise ParsingError(
            f"Currupted ion file {stream.name}: element symbol or atomic number missing"
        )

    parsed_data["element"] = str(root.find('symbol').text.strip())
    if parsed_data["element"] not in _valid_symbols:
        raise ParsingError(
            f"Unknown element symbol {parsed_data['element']} in file {stream.name}"
        )

    parsed_data["name"] = str(root.find('label').text.strip())
    parsed_data["atomic_number"] = int(root.find('z').text)
    parsed_data["mass"] = float(root.find('mass').text)

    return parsed_data
Example #2
0
    def parse_with_retrieved(self, retreived):
        """Parses the datafolder, stores results.

        This parser for this code ...
        """
        from aiida.common.exceptions import InvalidOperation
        from aiida.common import aiidalogger

        # suppose at the start that the job is unsuccessful, unless proven otherwise
        successful = False

        # check whether the yambo calc was an initialisation (p2y)
        try:
            settings_dict = self._calc.inp.settings.get_dict()
            settings_dict = _uppercase_dict(settings_dict,
                                            dict_name='settings')
        except AttributeError:
            settings_dict = {}

        initialise = settings_dict.pop('INITIALISE', None)

        # select the folder object
        out_folder = self._calc.get_retrieved_node()

        # check what is inside the folder
        list_of_files = out_folder.get_folder_list()

        try:
            input_params = self._calc.inp.parameters.get_dict()
        except AttributeError:
            if not initialise:
                raise ParsingError("Input parameters not found!")
            else:
                input_params = {}
        # retrieve the cell: if parent_calc is a YamboCalculation we must find the original PwCalculation
        # going back through the graph tree.
        parent_calc = self._calc.inp.parent_calc_folder.inp.remote_folder
        cell = {}
        if isinstance(parent_calc, YamboCalculation):
            has_found_cell = False
            while (not has_found_cell):
                try:
                    cell = parent_calc.inp.structure.cell
                    has_found_cell = True
                except AttributeError:
                    parent_calc = parent_calc.inp.parent_calc_folder.inp.remote_folder
        elif isinstance(parent_calc, PwCalculation):
            cell = self._calc.inp.parent_calc_folder.inp.remote_folder.inp.structure.cell

        output_params = {'warnings': [], 'errors': [], 'yambo_wrote': False}
        new_nodes_list = []
        ndbqp = {}
        ndbhf = {}
        try:
            results = YamboFolder(out_folder.get_abs_path())
        except Exception, e:
            success = False
            raise ParsingError("Unexpected behavior of YamboFolder: %s" % e)
Example #3
0
def _parse_first_line(line, fname):
    """ parse the first line of the basis set

    :param line: the line string
    :param fname: the filename string
    :return: (atomic_number, basis_type, num_shells)
    """
    from aiida.common.exceptions import ParsingError

    # first line should contain the atomic number as the first argument
    first_line = line.strip().split()
    if not len(first_line) == 2:
        raise ParsingError(
            "The first line should contain only two fields: '{}' for file {}".
            format(line, fname))

    atomic_number_str = first_line[0]

    if not atomic_number_str.isdigit():
        raise ParsingError(
            "The first field should be the atomic number '{}' for file {}".
            format(line, fname))
    anumber = int(atomic_number_str)
    atomic_number = None
    basis_type = None
    if anumber < 99:
        atomic_number = anumber
        basis_type = "all-electron"

    elif 200 < anumber < 999:
        raise NotImplementedError(
            "valence electron basis sets not currently supported")
        # TODO support valence electron basis sets not currently supported (ECP must also be defined)
        # atomic_number = anumber % 100
        # basis_type = "valence-electron"

    elif anumber > 1000:
        atomic_number = anumber % 100
        basis_type = "all-electron"

    if atomic_number is None:
        raise ParsingError("Illegal atomic number {} for file {}".format(
            anumber, fname))

    num_shells_str = first_line[1]
    if not num_shells_str.isdigit():
        raise ParsingError(
            "The second field should be the number of shells {} for file {}".
            format(line, fname))
    num_shells = int(num_shells_str)

    # we would deal with different numbering at .d12 creation time
    newline = "{0} {1}\n".format(
        atomic_number if basis_type == "all-electron" else 200 + atomic_number,
        num_shells)

    return atomic_number, basis_type, num_shells, newline
Example #4
0
    def get_cutoffs_from_djrepo(cls, djrepo, pseudo_type):
        """Collect and organize the suggested cutoffs (hints) from a DJREPO file.

        DJREPO files only provide a kinetic energy cutoff, so for pseudo types which contain norm-conserving pseudos
        from PseudoDojo, we use a dual of 8.0 to generate the charge density (rho) cutoff.

        For PAW potentials from PseudoDojo (which are assumed to be JthXmlData), a dual of 2.0 is used.

        The cutoffs in DJREPO files are given in Hartree, which is converted to eV.

        :param djrepo: dictionary loaded from DJREPO JSON file
        :returns: cutoffs dictionary (in eV) where keys are stringency levels and values are
            {'cutoff_wfc': ..., 'cutoff_rho': ...}
        """
        dual_mapping = {
            UpfData: 4.0,
            Psp8Data: 4.0,
            PsmlData: 4.0,
            JthXmlData: 2.0
        }

        try:
            dual = dual_mapping[pseudo_type]
        except KeyError as exception:
            raise ValueError(
                f'cannot get cutoffs for pseudo type `{pseudo_type}` because the appropriate dual '
                'for generating density cutoffs is unknown') from exception

        cutoffs = {}

        try:
            hints = djrepo['hints']
        except KeyError as exception:
            raise ParsingError(
                'key `hints` is not defined in the djrepo.') from exception

        for stringency in ['low', 'normal', 'high']:

            try:
                ecutwfc = hints.get(stringency, {})['ecut']
            except KeyError as exception:
                raise ParsingError(
                    f'stringency `{stringency}` is not defined in the djrepo `hints`'
                ) from exception

            cutoffs[stringency] = {
                'cutoff_wfc': ecutwfc,
                'cutoff_rho': ecutwfc * dual
            }

        return cutoffs
Example #5
0
    def set_file(self, file):
        """Store the file in the repository and parse it to set the `element` and `md5` attributes.

        :param file: filepath or filelike object of the UPF potential file to store.
        """
        # pylint: disable=redefined-builtin
        from aiida.common.exceptions import ParsingError
        from aiida.common.files import md5_file, md5_from_filelike

        parsed_data = parse_upf(file)

        try:
            md5sum = md5_file(file)
        except TypeError:
            md5sum = md5_from_filelike(file)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError(
                "No 'element' parsed in the UPF file {}; unable to store".
                format(self.filename))

        super(UpfData, self).set_file(file)

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5sum)
    def store(self, *args, **kwargs):
        """
        Store the node, reparsing the file so that the md5 and the element
        are correctly reset.  (**why?)
        """
        from aiida.common.exceptions import ParsingError, ValidationError
        import aiida.common.utils
        from aiida.common.files import md5_from_filelike

        if self.is_stored:
            return self

        # Already done??
        with self.open(mode='r') as handle:
            parsed_data = parse_psml(handle.name)

        # Open in binary mode which is required for generating the md5 checksum
        with self.open(mode='rb') as handle:
            md5sum = md5_from_filelike(handle)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError("No 'element' parsed in the PSML file {};"
                               " unable to store".format(self.filename))

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5sum)

        return super(PsmlData, self).store(*args, **kwargs)
Example #7
0
    def set_file(self, file, filename=None):
        """Store the file in the repository and parse it to set the `element` and `md5` attributes.

        :param file: filepath or filelike object of the UPF potential file to store.
            Hint: Pass io.BytesIO(b"my string") to construct the file directly from a string.
        :param filename: specify filename to use (defaults to name of provided file).
        """
        # pylint: disable=redefined-builtin
        from aiida.common.exceptions import ParsingError
        from aiida.common.files import md5_file, md5_from_filelike

        parsed_data = parse_upf(file)

        try:
            md5sum = md5_file(file)
        except TypeError:
            md5sum = md5_from_filelike(file)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError(
                "No 'element' parsed in the UPF file {}; unable to store".
                format(self.filename))

        super(UpfData, self).set_file(file, filename=filename)

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5sum)
    def store(self, **kwargs):  #pylint: disable=arguments-differ
        """
        Store the node, reparsing the file so that the md5 and the element
        are correctly reset.
        """
        from aiida.common.exceptions import ParsingError
        from aiida.common.files import md5_from_filelike

        if self.is_stored:
            return self

        with self.open(mode='r') as handle:
            parsed_data = parse_psf(handle)

        # Open in binary mode which is required for generating the md5 checksum
        with self.open(mode='rb') as handle:
            md5sum = md5_from_filelike(handle)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError("No 'element' parsed in the PSF file {};" " unable to store".format(self.filename))

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5sum)

        return super().store(**kwargs)
Example #9
0
    def store(self, *args, **kwargs):
        """
        Store the node, reparsing the file so that the md5 and the element
        are correctly reset.
        """
        from aiida.common.exceptions import ParsingError, ValidationError
        import aiida.common.utils

        psf_abspath = self.get_file_abs_path()
        if not psf_abspath:
            raise ValidationError("No valid PSF was passed!")

        parsed_data = parse_psf(psf_abspath)
        md5sum = aiida.common.utils.md5_file(psf_abspath)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError("No 'element' parsed in the PSF file {};"
                               " unable to store".format(self.filename))

        self._set_attr('element', str(element))
        self._set_attr('md5', md5sum)

        return super(PsfData, self).store(*args, **kwargs)
Example #10
0
    def get_md5_from_djrepo(cls, djrepo, pseudo_type):
        """Get the appropriate md5 hash from a DJREPO file.

        :param djrepo: dictionary loaded from DJREPO JSON file.
        :reutnrs: md5 string.
        """
        md5_key_mapping = {
            UpfData: 'md5_upf',
            Psp8Data: 'md5',
            PsmlData: 'md5_psml',
            JthXmlData: 'md5'
        }

        try:
            md5_key = md5_key_mapping[pseudo_type]
        except KeyError as exception:
            raise ValueError(
                f'pseudo type `{pseudo_type}` is unsupported by PseudoDojo djrepos: {exception}'
            ) from exception

        try:
            md5 = djrepo[md5_key]
        except KeyError as exception:
            raise ParsingError(
                f'key `{cls.md5_key}` is not defined in the djrepo: {exception}'
            ) from exception

        return md5
Example #11
0
    def store(self, *args, **kwargs):
        """Store the node, reparsing the file so that the md5 and the element are correctly reset."""
        # pylint: disable=arguments-differ
        from aiida.common.exceptions import ParsingError
        from aiida.common.files import md5_from_filelike

        if self.is_stored:
            return self

        with self.open(mode='r') as handle:
            parsed_data = parse_upf(handle)

        # Open in binary mode which is required for generating the md5 checksum
        with self.open(mode='rb') as handle:
            md5 = md5_from_filelike(handle)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError(
                'Could not parse the element from the UPF file {}'.format(
                    self.filename))

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5)

        return super(UpfData, self).store(*args, **kwargs)
def parse_psml(fname, check_filename=True):
    """
    Try to get relevant information from the PSML. For the moment, only the
    element name.
    If check_filename is True, raise a ParsingError exception if the filename
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    # from aiida.common import AIIDA_LOGGER
    # TODO: move these data in a 'chemistry' module
    from aiida.orm.nodes.data.structure import _valid_symbols
    from xml.dom import minidom

    parsed_data = {}

    psml_contents = minidom.parse(fname)

    # Parse the element
    itemlist = psml_contents.getElementsByTagName('pseudo-atom-spec')
    s = itemlist[0]
    element = s.attributes['atomic-label'].value
    atomic_number = s.attributes['atomic-number'].value
    z_pseudo = s.attributes['z-pseudo'].value

    # Only first letter capitalized!
    if element is None:
        raise ParsingError(
            "Unable to find the element of PSML {}".format(fname))
    element = element.capitalize()
    if element not in _valid_symbols:
        raise ParsingError("Unknown element symbol {} for file {}".format(
            element, fname))

    if check_filename:
        if not os.path.basename(fname).lower().startswith(element.lower()):
            raise ParsingError("Filename {0} was recognized for element "
                               "{1}, but the filename does not start "
                               "with {1}".format(fname, element))

    parsed_data['element'] = element
    parsed_data['atomic-number'] = atomic_number
    parsed_data['z-pseudo'] = z_pseudo

    return parsed_data
Example #13
0
def parse_psf(fname, check_filename=True):
    """
    Try to get relevant information from the PSF. For the moment, only the
    element name.
    If check_filename is True, raise a ParsingError exception if the filename
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    # from aiida.common import AIIDA_LOGGER
    from aiida.orm.nodes.data.structure import _valid_symbols

    parsed_data = {}

    try:
        psf_contents = fname.read().split()
        fname = fname.name
    except AttributeError:
        with io.open(fname, encoding='utf8') as fil:
            psf_contents = fil.read().split()

    # Parse the element
    element = None
    for element in psf_contents:
        break

    # Only first letter capitalized!
    if element is None:
        raise ParsingError("Unable to find the element of PSF {}".format(fname))
    element = element.capitalize()
    if element not in _valid_symbols:
        raise ParsingError("Unknown element symbol {} for file {}".format(element, fname))

    if check_filename:
        if not os.path.basename(fname).lower().startswith(element.lower()):
            raise ParsingError(
                "Filename {0} was recognized for element "
                "{1}, but the filename does not start "
                "with {1}".format(fname, element)
            )

    parsed_data['element'] = element

    return parsed_data
Example #14
0
def parse_psf(fname, check_filename=True):
    """
    Try to get relevant information from the PSF. For the moment, only the
    element name.
    If check_filename is True, raise a ParsingError exception if the filename
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    # TODO: move these data in a 'chemistry' module
    from aiida.orm.data.structure import _valid_symbols

    parsed_data = {}

    with open(fname) as f:

        # Parse the element
        element = None
        for element in f.read().split():
            break

        # Only first letter capitalized!
        if element is None:
            raise ParsingError(
                "Unable to find the element of PSF {}".format(fname))
        element = element.capitalize()
        if element not in _valid_symbols:
            raise ParsingError("Unknown element symbol {} for file {}".format(
                element, fname))

        if check_filename:
            if not os.path.basename(fname).lower().startswith(element.lower()):
                raise ParsingError("Filename {0} was recognized for element "
                                   "{1}, but the filename does not start "
                                   "with {1}".format(fname, element))

        parsed_data['element'] = element

    return parsed_data
Example #15
0
    def __init__(self, pwinput):
        """
        Parse inputs's namelist and cards to create attributes of the info.

        :param pwinput:
            Any one of the following

                * A string of the (existing) absolute path to the pwinput file.
                * A single string containing the pwinput file's text.
                * A list of strings, with the lines of the file as the elements.
                * A file object. (It will be opened, if it isn't already.)

        :raises IOError: if ``pwinput`` is a file and there is a problem reading
            the file.
        :raises TypeError: if ``pwinput`` is a list containing any non-string
            element(s).
        :raises aiida.common.exceptions.ParsingError: if there are issues
            parsing the pwinput.
        """
        # Get the text of the pwinput file as a single string.
        # File.
        if isinstance(pwinput, file):
            try:
                self.input_txt = pwinput.read()
            except IOError:
                raise IOError('Unable to open the provided pwinput, {}'
                              ''.format(file.name))
        # List.
        elif isinstance(pwinput, list):
            if all((issubclass(type(s), basestring) for s in pwinput)):
                self.input_txt = ''.join(pwinput)
            else:
                raise TypeError(
                    'You provided a list to parse, but some elements were not '
                    'strings. Each element should be a string containing a line'
                    'of the pwinput file.')
        # Path or string of the text.
        elif issubclass(type(pwinput), basestring):
            if os.path.isfile(pwinput):
                if os.path.exists(pwinput) and os.path.isabs(pwinput):
                    self.input_txt = open(pwinput).read()
                else:
                    raise IOError(
                        'Please provide the absolute path to an existing '
                        'pwinput file.')
            else:
                self.input_txt = pwinput

        # Check that pwinput is not empty.
        if len(self.input_txt.strip()) == 0:
            raise ParsingError('The pwinput provided was empty!')
Example #16
0
    def str01_to_bool(s):
        """
        Map strings '0', '1' strings to bools: '0' --> True; '1' --> False.

        While this is opposite to the QE standard, this mapping is what needs to
        be passed to aiida in a 'settings' ParameterData object.
        (See the _if_pos method of BasePwCpInputGenerator)
        """
        if s == '0':
            return True
        elif s == '1':
            return False
        else:
            raise ParsingError(
                'Unable to convert if_pos = {} to bool'.format(s))
    def set_file(self, file, filename=None):
        """
        I pre-parse the file to store the attributes.
        """
        from aiida.common.exceptions import ParsingError

        parsed_data = parse_psml(file)
        md5sum = md5_file(file)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError(
                "No 'element' parsed in the PSML file: unable to store")

        super(PsmlData, self).set_file(file)

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5sum)
Example #18
0
    def set_file(self, filename):
        """
        I pre-parse the file to store the attributes.
        """
        from aiida.common.exceptions import ParsingError
        import aiida.common.utils

        parsed_data = parse_psf(filename)
        md5sum = aiida.common.utils.md5_file(filename)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError("No 'element' parsed in the PSF file {};"
                               " unable to store".format(self.filename))

        super(PsfData, self).set_file(filename)

        self._set_attr('element', str(element))
        self._set_attr('md5', md5sum)
    def set_file(self, filename):
        """
        I pre-parse the file to store the attributes.
        """
        from aiida.common.exceptions import ParsingError
        import aiida.common.utils

        # print("Called set_file","type of filename:",type(filename))
        parsed_data = parse_psml(filename)
        md5sum = md5_file(filename)

        try:
            element = parsed_data['element']
        except KeyError:
            raise ParsingError("No 'element' parsed in the PSML file {};"
                               " unable to store".format(self.filename))

        super(PsmlData, self).set_file(filename)

        self.set_attribute('element', str(element))
        self.set_attribute('md5', md5sum)
Example #20
0
def parse_namelists(txt):
    """
    Parse txt to extract a dictionary of the namelist info.
    
    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A nested dictionary of the namelists and their key-value pairs. The
        namelists will always be upper-case keys, while the parameter keys will
        always be lower-case.

        For example: ::

            {"CONTROL": {"calculation": "bands",
                         "prefix": "al",
                         "pseudo_dir": "./pseudo",
                         "outdir": "./out"},
             "ELECTRONS": {"diagonalization": "cg"},
             "SYSTEM": {"nbnd": 8,
                        "ecutwfc": 15.0,
                        "celldm(1)": 7.5,
                        "ibrav": 2,
                        "nat": 1,
                        "ntyp": 1}
            }

    :rtype: dictionary
    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # TODO: Incorporate support for algebraic expressions?
    # Define the re to match a namelist and extract the info from it.
    namelist_re = re.compile(
        r"""
        ^ [ \t]* &(\S+) [ \t]* $\n  # match line w/ nmlst tag; save nmlst name
        (
         [\S\s]*?                # match any line non-greedily
        )                        # save the group of text between nmlst
        ^ [ \t]* / [ \t]* $\n    # match line w/ "/" as only non-whitespace char
        """, re.M | re.X)
    # Define the re to match and extract all of the key = val pairs inside
    # a block of namelist text.
    key_value_re = re.compile(
        r"""
        [ \t]* (\S+?) [ \t]*  # match and store key
        =               # equals sign separates key and value
        [ \t]* (\S+?) [ \t]*  # match and store value
        [\n,]           # return or comma separates "key = value" pairs
        """, re.M | re.X)
    # Scan through the namelists...
    params_dict = {}
    for nmlst, blockstr in namelist_re.findall(txt):
        # ...extract the key value pairs, storing them each in nmlst_dict,...
        nmlst_dict = {}
        for key, valstr in key_value_re.findall(blockstr):
            nmlst_dict[key.lower()] = str2val(valstr)
        # ...and, store nmlst_dict as a value in params_dict with the namelist
        # as the key.
        if len(nmlst_dict.keys()) > 0:
            params_dict[nmlst.upper()] = nmlst_dict
    if len(params_dict) == 0:
        raise ParsingError(
            'No data was found while parsing the namelist in the following '
            'text\n' + txt)
    return params_dict
Example #21
0
def parse_upf(fname, check_filename=True):
    """
    Try to get relevant information from the UPF. For the moment, only the
    element name. Note that even UPF v.2 cannot be parsed with the XML minidom!
    (e.g. due to the & characters in the human-readable section).

    If check_filename is True, raise a ParsingError exception if the filename
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    from aiida.common import aiidalogger
    # TODO: move these data in a 'chemistry' module
    from aiida.orm.data.structure import _valid_symbols

    parsed_data = {}

    with open(fname) as f:
        first_line = f.readline().strip()
        match = _upfversion_regexp.match(first_line)
        if match:
            version = match.group('version')
            aiidalogger.debug("Version found: {} for file {}".format(
                version, fname))
        else:
            aiidalogger.debug("Assuming version 1 for file {}".format(fname))
            version = "1"

        parsed_data['version'] = version
        try:
            version_major = int(version.partition('.')[0])
        except ValueError:
            # If the version string does not start with a dot, fallback
            # to version 1
            aiidalogger.debug("Falling back to version 1 for file {}, "
                              "version string '{}' unrecognized".format(
                                  fname, version))
            version_major = 1

        element = None
        if version_major == 1:
            for l in f:
                match = _element_v1_regexp.match(l.strip())
                if match:
                    element = match.group('element_name')
                    break
        else:  # all versions > 1
            for l in f:
                match = _element_v2_regexp.match(l.strip())
                if match:
                    element = match.group('element_name')
                    break

        if element is None:
            raise ParsingError(
                "Unable to find the element of UPF {}".format(fname))
        element = element.capitalize()
        if element not in _valid_symbols:
            raise ParsingError("Unknown element symbol {} for file {}".format(
                element, fname))
        if check_filename:
            if not os.path.basename(fname).lower().startswith(element.lower()):
                raise ParsingError("Filename {0} was recognized for element "
                                   "{1}, but the filename does not start "
                                   "with {1}".format(fname, element))

        parsed_data['element'] = element

    return parsed_data
Example #22
0
    def parse_djrepos_from_folder(cls, dirpath, pseudo_type):
        # pylint: disable=too-many-locals,too-many-branches
        """Parse the djrepo files in the given directory into a list of data nodes.

        .. note:: The directory pointed to by `dirpath` should only contain djrepo files. Optionally, it can contain
            just a single directory, that contains all the djrepo files. If any other files are stored in the basepath
            or the subdirectory that cannot be successfully parsed as djrepo files the method will raise a `ValueError`.

        :param dirpath: absolute path to a directory containing djrepos.
        :return: list of data nodes.
        :raises ValueError: if `dirpath` is not a directory or contains anything other than files.
        :raises ValueError: if `dirpath` contains multiple djrepos for the same element.
        :raises ParsingError: if the constructor of the pseudo type fails for one of the files in the `dirpath`.
        """
        md5s = {}
        cutoffs = {'low': {}, 'normal': {}, 'high': {}}
        elements = []

        if not os.path.isdir(dirpath):
            raise ValueError(f'`{dirpath}` is not a directory')

        dirpath_contents = os.listdir(dirpath)

        if len(dirpath_contents) == 1 and os.path.isdir(os.path.join(dirpath, dirpath_contents[0])):
            dirpath = os.path.join(dirpath, dirpath_contents[0])

        for filename in os.listdir(dirpath):
            filepath = os.path.join(dirpath, filename)

            if not os.path.isfile(filepath):
                raise ValueError(f'dirpath `{dirpath}` contains at least one entry that is not a file')

            # Some of the djrepo archives contain extraneous files. Here we skip files with unsupported extensions.
            if filename.split('.')[-1] not in cls._pseudo_repo_file_extensions:
                warnings.warn(f'filename {filename} does not have a supported extension. Skipping...')
                continue

            try:
                with open(filepath, 'r') as handle:
                    djrepo = json.load(handle)
            except ParsingError as exception:
                raise ParsingError(f'failed to parse `{filepath}`: {exception}') from exception
            else:
                match = re.search(r'^([A-Za-z]{1,2})\.\w+', filename)
                if match is None:
                    raise ParsingError(
                        f'could not parse a valid element symbol from the filename `{filename}`. '
                        'It should have the format `ELEMENT.EXTENSION`'
                    )
                element = match.group(1)
                if element in elements:
                    raise ValueError(f'directory `{dirpath}` contains djrepos with duplicate elements`')

                try:
                    md5 = cls.get_md5_from_djrepo(djrepo, pseudo_type=pseudo_type)
                except (ParsingError, ValueError) as exception:
                    raise ParsingError(f'failed to parse md5 from djrepo file `{filename}`: {exception}') from exception
                else:
                    md5s[element] = md5

                try:
                    djrepo_cutoffs = cls.get_cutoffs_from_djrepo(djrepo, pseudo_type=pseudo_type)
                except ParsingError as exception:
                    raise ParsingError(
                        f'failed to parse cutoffs from djrepo file `{filename}`: {exception}'
                    ) from exception
                else:
                    for stringency in ['low', 'normal', 'high']:
                        cutoffs[stringency][element] = djrepo_cutoffs[stringency]

                elements.append(element)

        if (not cutoffs['low']) and (not cutoffs['normal']) and (not cutoffs['high']):
            raise ValueError(f'no djrepos were parsed from `{dirpath}`')

        return md5s, cutoffs
Example #23
0
def parse_upf(fname, check_filename=True):
    """
    Try to get relevant information from the UPF. For the moment, only the
    element name. Note that even UPF v.2 cannot be parsed with the XML minidom!
    (e.g. due to the & characters in the human-readable section).

    If check_filename is True, raise a ParsingError exception if the filename
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    from aiida.common import AIIDA_LOGGER
    from aiida.orm.nodes.data.structure import _valid_symbols

    parsed_data = {}

    try:
        upf_contents = fname.read()
        fname = fname.name
    except AttributeError:
        with io.open(fname, encoding='utf8') as handle:
            upf_contents = handle.read()

    match = REGEX_UPF_VERSION.search(upf_contents)
    if match:
        version = match.group('version')
        AIIDA_LOGGER.debug('Version found: {} for file {}'.format(
            version, fname))
    else:
        AIIDA_LOGGER.debug('Assuming version 1 for file {}'.format(fname))
        version = '1'

    parsed_data['version'] = version
    try:
        version_major = int(version.partition('.')[0])
    except ValueError:
        # If the version string does not contain a dot, fallback
        # to version 1
        AIIDA_LOGGER.debug('Falling back to version 1 for file {}, '
                           "version string '{}' unrecognized".format(
                               fname, version))
        version_major = 1

    element = None
    if version_major == 1:
        match = REGEX_ELEMENT_V1.search(upf_contents)
        if match:
            element = match.group('element_name')
    else:  # all versions > 1
        match = REGEX_ELEMENT_V2.search(upf_contents)
        if match:
            element = match.group('element_name')

    if element is None:
        raise ParsingError(
            'Unable to find the element of UPF {}'.format(fname))
    element = element.capitalize()
    if element not in _valid_symbols:
        raise ParsingError('Unknown element symbol {} for file {}'.format(
            element, fname))
    if check_filename:
        if not os.path.basename(fname).lower().startswith(element.lower()):
            raise ParsingError('Filename {0} was recognized for element '
                               '{1}, but the filename does not start '
                               'with {1}'.format(fname, element))

    parsed_data['element'] = element

    return parsed_data
Example #24
0
def parse_cell_parameters(txt):
    """
    Return dict containing info from the CELL_PARAMETERS card block in txt.

    .. note:: This card is only needed if ibrav = 0. Therefore, if the card is
           not present, the function will return None and not raise an error.

    .. note:: If the units are unspecified, they will be returned as None. The
           units interpreted by QE depend on whether or not one of 'celldm(1)'
           or 'a' is set in &SYSTEM.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary (if CELL_PARAMETERS is present; else: None) with

            * units: the units of the lattice vectors (always lower-case) or
              None
            * cell: 3x3 list with lattice vectors as rows

        For example: ::

            {'units': 'angstrom',
             'cell': [[16.9, 0.0, 0.0],
                      [-2.6, 8.0, 0.0],
                      [-2.6, -3.5, 7.2]]}

    :rtype: dict or None
    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the card block.
    cell_parameters_block_re = re.compile(
        r"""
        ^ [ \t]* CELL_PARAMETERS [ \t]*
            [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
         ){3}
        )
        """, RE_FLAGS)
    # Define re for the info contained in the block.
    atomic_species_re = re.compile(
        r"""
        ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n?
        """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    match = cell_parameters_block_re.search(txt)
    if not match:
        return None
    # Use specified units or None if not specified.
    units = match.group('units')
    if units is not None:
        units = units.lower()
    # Get the string containing the lines of the block.
    if match.group('block') is None:
        raise ParsingError(
            'The CELL_PARAMETER card block was parsed as empty in\n' + txt)
    else:
        blockstr = match.group('block')
    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Now, extract the lattice vectors.
    lattice_vectors = []
    for match in atomic_species_re.finditer(blockstr):
        lattice_vectors.append(map(fortfloat, match.groups()))
    info_dict = dict(units=units, cell=lattice_vectors)
    return info_dict
Example #25
0
def parse_atomic_positions(txt):
    """
    Return a dictionary containing info from the ATOMIC_POSITIONS card block
    in txt.

    .. note:: If the units are unspecified, they will be returned as None.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary with

            * units: the units of the positions (always lower-case) or None
            * names: list of the atom names (e.g. ``'Si'``, ``'Si0'``,
              ``'Si_0'``)
            * positions: list of the [x, y, z] positions
            * fixed_coords: list of [x, y, z] (bools) of the force modifications
              (**Note:** True <--> Fixed, as defined in the
              ``BasePwCpInputGenerator._if_pos`` method)

        For example: ::

            {'units': 'bohr',
             'names': ['C', 'O'],
             'positions': [[0.0, 0.0, 0.0],
                           [0.0, 0.0, 2.5]]
             'fixed_coords': [[False, False, False],
                              [True, True, True]]}


    :rtype: dictionary
    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the card block.
    # NOTE: This will match card block lines w/ or w/out force modifications.
    atomic_positions_block_re = re.compile(
        r"""
        ^ [ \t]* ATOMIC_POSITIONS [ \t]*
            [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]*
          (?:
           \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+
           (?:[ \t]+ [{(]? [ \t]* [01] [ \t]+ [01] [ \t]+ [01] [ \t]* [)}]?)?
          )
          [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for atomic positions without force modifications.
    atomic_positions_re = re.compile(
        r"""
        ^ [ \t]*
        (?P<name>\S+) [ \t]+ (?P<x>\S+) [ \t]+ (?P<y>\S+) [ \t]+ (?P<z>\S+)
            [ \t]* $\n?
        """, RE_FLAGS)
    # Define re for atomic positions with force modifications.
    atomic_positions_constraints_re = re.compile(
        r"""
        ^ [ \t]*
        (?P<name>\S+) [ \t]+ (?P<x>\S+) [ \t]+ (?P<y>\S+) [ \t]+ (?P<z>\S+)
            [ \t]+ [{(]? [ \t]* (?P<if_pos1>[01]) [ \t]+ (?P<if_pos2>[01])
            [ \t]+ (?P<if_pos3>[01]) [ \t]* [)}]?
        [ \t]* $\n?
        """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    match = atomic_positions_block_re.search(txt)
    if not match:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was not found in\n' + txt)
    # Get the units. If they are not found, match.group('units') will be None.
    units = match.group('units')
    if units is not None:
        units = units.lower()
    # Get the string containing the lines of the block.
    if match.group('block') is None:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was parsed as empty in\n' + txt)
    else:
        blockstr = match.group('block')

    # Define a small helper function to convert if_pos strings to bools that
    # correspond to the mapping of BasePwCpInputGenerator._if_pos method.
    def str01_to_bool(s):
        """
        Map strings '0', '1' strings to bools: '0' --> True; '1' --> False.

        While this is opposite to the QE standard, this mapping is what needs to
        be passed to aiida in a 'settings' ParameterData object.
        (See the _if_pos method of BasePwCpInputGenerator)
        """
        if s == '0':
            return True
        elif s == '1':
            return False
        else:
            raise ParsingError(
                'Unable to convert if_pos = {} to bool'.format(s))

    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Parse the lines of the card block, extracting an atom name, position
    # and fixed coordinates.
    names, positions, fixed_coords = [], [], []
    # First, try using the re for lines without force modifications. Set the
    # default force modification to the default (True) for each atom.
    for match in atomic_positions_re.finditer(blockstr):
        names.append(match.group('name'))
        positions.append(map(fortfloat, match.group('x', 'y', 'z')))
        fixed_coords.append(3 * [False])  # False <--> not fixed (the default)
    # Next, try using the re for lines with force modifications.
    for match in atomic_positions_constraints_re.finditer(blockstr):
        names.append(match.group('name'))
        positions.append(map(fortfloat, match.group('x', 'y', 'z')))
        if_pos123 = match.group('if_pos1', 'if_pos2', 'if_pos3')
        fixed_coords.append(map(str01_to_bool, if_pos123))
    # Check that the number of atomic positions parsed is equal to the number of
    # lines in blockstr
    n_lines = len(blockstr.rstrip().split('\n'))
    if len(names) != n_lines:
        raise ParsingError(
            'Only {} atomic positions were parsed from the {} lines of the '
            'ATOMIC_POSITIONS card block:\n{}'.format(len(names), n_lines,
                                                      blockstr))
    info_dict = dict(units=units,
                     names=names,
                     positions=positions,
                     fixed_coords=fixed_coords)
    return info_dict
Example #26
0
def parse_basis(fname):
    """get relevant information from the basis file

    :param fname: the file path
    :return: (metadata_dict, content_str)

    - The basis file must contain one basis set in the CRYSTAL17 format
    - blank lines and lines beginning '#' will be ignored
    - the file can also start with a fenced (with ---), yaml formatted header section
        - Note keys should not contain '.'s

    Example

    ::

        # an ignored comment
        ---
        author: J Smith
        year: 1999
        ---
        8 2
        1 0 3  2.  0.
        1 1 3  6.  0.
    
    """
    from aiida.common.exceptions import ParsingError
    meta_data = {}

    in_yaml = False
    yaml_lines = []
    protected_keys = [
        "atomic_number", "num_shells", "element", "basis_type", "content"
    ]
    parsing_data = False
    content = []
    with open(fname) as f:
        for line in f:
            # ignore commented and blank lines
            if line.strip().startswith("#") or not line.strip():
                continue
            if line.strip() == "---" and not parsing_data:
                if not in_yaml:
                    in_yaml = True
                    continue
                else:
                    head_data = yaml.load("".join(yaml_lines))
                    head_data = {} if not head_data else head_data
                    if not isinstance(head_data, dict):
                        raise ParsingError(
                            "the header data could not be read for file: {}".
                            format(fname))
                    if set(head_data.keys()).intersection(protected_keys):
                        raise ParsingError(
                            "the header data contained a forbidden key(s) {} for file: {}"
                            .format(protected_keys, fname))
                    meta_data = head_data
                    in_yaml = False
                    parsing_data = True
                    continue
            if in_yaml:
                yaml_lines.append(line)
                continue

            parsing_data = True

            if not content:
                atomic_number, basis_type, num_shells, line = _parse_first_line(
                    line, fname)

                meta_data["atomic_number"] = atomic_number
                meta_data["element"] = ATOMIC_NUM2SYMBOL[atomic_number]
                meta_data["basis_type"] = basis_type
                meta_data["num_shells"] = num_shells

            content.append(line)

    if not content:
        raise ParsingError(
            "The basis set file contains no content: {}".format(fname))

    validate_basis_string("".join(content))

    return meta_data, "".join(content)
Example #27
0
def parse_k_points(txt):
    """
    Return a dictionary containing info from the K_POINTS card block in txt.

    .. note:: If the type of kpoints (where type = x in the card header,
           "K_POINTS x") is not present, type will be returned as 'tpiba', the
           QE default.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary containing

            * type: the type of kpoints (always lower-case)
            * points: an Nx3 list of the kpoints (will not be present if type =
              'gamma' or type = 'automatic')
            * weights: a 1xN list of the kpoint weights (will not be present if
              type = 'gamma' or type = 'automatic')
            * mesh: a 1x3 list of the number of equally-spaced points in each
              direction of the Brillouin zone, as in Monkhorst-Pack grids (only
              present if type = 'automatic')
            * offset: a 1x3 list of the grid offsets in each direction of the
              Brillouin zone (only present if type = 'automatic')
              (**Note:** The offset value for each direction will be *one of*
              ``0.0`` [no offset] *or* ``0.5`` [offset by half a grid step].
              This differs from the Quantum Espresso convention, where an offset
              value of ``1`` corresponds to a half-grid-step offset, but adheres
              to the current AiiDa convention.


        Examples: ::

            {'type': 'crystal',
             'points': [[0.125,  0.125,  0.0],
                        [0.125,  0.375,  0.0],
                        [0.375,  0.375,  0.0]],
             'weights': [1.0, 2.0, 1.0]}

            {'type': 'automatic',
             'points': [8, 8, 8],
             'offset': [0.0, 0.5, 0.0]}

            {'type': 'gamma'}

    :rtype: dictionary
    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the special-type card block.
    k_points_special_block_re = re.compile(
        r"""
        ^ [ \t]* K_POINTS [ \t]*
            [{(]? [ \t]* (?P<type>\S+?)? [ \t]* [)}]? [ \t]* $\n
        ^ [ \t]* \S+ [ \t]* $\n  # nks
        (?P<block>
         (?:
          ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for the info contained in the special-type block.
    k_points_special_re = re.compile(
        r"""
    ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n?
    """, RE_FLAGS)
    # Define re for the automatic-type card block and its line of info.
    k_points_automatic_block_re = re.compile(
        r"""
        ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* automatic [ \t]* [)}]? [ \t]* $\n
        ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+)
            [ \t]+ (\S+) [ \t]* $\n?
        """, RE_FLAGS)
    # Define re for the gamma-type card block. (There is no block info.)
    k_points_gamma_block_re = re.compile(
        r"""
        ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* gamma [ \t]* [)}]? [ \t]* $\n
        """, RE_FLAGS)
    # Try finding the card block using all three types.
    info_dict = {}
    match = k_points_special_block_re.search(txt)
    if match:
        if match.group('type') is not None:
            info_dict['type'] = match.group('type').lower()
        else:
            info_dict['type'] = 'tpiba'
        blockstr = match.group('block')
        points = []
        weights = []
        for match in k_points_special_re.finditer(blockstr):
            points.append(map(float, match.group(1, 2, 3)))
            weights.append(float(match.group(4)))
        info_dict['points'] = points
        info_dict['weights'] = weights
    else:
        match = k_points_automatic_block_re.search(txt)
        if match:
            info_dict['type'] = 'automatic'
            info_dict['points'] = map(int, match.group(1, 2, 3))
            info_dict['offset'] = [
                0. if x == 0 else 0.5 for x in map(int, match.group(4, 5, 6))
            ]
        else:
            match = k_points_gamma_block_re.search(txt)
            if match:
                info_dict['type'] = 'gamma'
            else:
                raise ParsingError('K_POINTS card not found in\n' + txt)
    return info_dict
Example #28
0
def parse_atomic_species(txt):
    """
    Return a dictionary containing info from the ATOMIC_SPECIES card block
    in txt.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary with

            * names: list of the atom names (e.g. 'Si', 'Si0', 'Si_0') (case
              as-is)
            * masses: list of the masses of the atoms in 'names'
            * pseudo_file_names: list of the pseudopotential file names for the
              atoms in 'names' (case as-is)

        Example: ::

            {'names': ['Li', 'O', 'Al', 'Si'],
             'masses': [6.941,  15.9994, 26.98154, 28.0855],
             'pseudo_file_names': ['Li.pbe-sl-rrkjus_psl.1.0.0.UPF',
                                   'O.pbe-nl-rrkjus_psl.1.0.0.UPF',
                                   'Al.pbe-nl-rrkjus_psl.1.0.0.UPF',
                                   'Si3 28.0855 Si.pbe-nl-rrkjus_psl.1.0.0.UPF']

    :rtype: dictionary
    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for atomic species card block.
    atomic_species_block_re = re.compile(
        r"""
        ^ [ \t]* ATOMIC_SPECIES [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for the info contained in the block.
    atomic_species_re = re.compile(
        r"""
        ^ [ \t]* (?P<name>\S+) [ \t]+ (?P<mass>\S+) [ \t]+ (?P<pseudo>\S+)
            [ \t]* $\n?
        """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    try:
        match = atomic_species_block_re.search(txt)
    except AttributeError:
        raise ParsingError('The ATOMIC_SPECIES card block was not found in\n' +
                           txt)
    # Make sure the card block lines were extracted. If they were, store the
    # string of lines as blockstr.
    if match.group('block') is None:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was parse as empty in\n' + txt)
    else:
        blockstr = match.group('block')
    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Now, extract the name, mass, and pseudopotential file name from each line
    # of the card block.
    names, masses, pseudo_fnms = [], [], []
    for match in atomic_species_re.finditer(blockstr):
        names.append(match.group('name'))
        masses.append(fortfloat(match.group('mass')))
        pseudo_fnms.append(match.group('pseudo'))
    info_dict = dict(names=names, masses=masses, pseudo_file_names=pseudo_fnms)
    return info_dict
Example #29
0
def parse_basis(basis_file):
    """Get relevant information from the basis file.

    :param basis_file: absolute path to a file or open filelike object
    :return: (metadata_dict, content_str)

    - The basis file must contain one basis set in the CRYSTAL17 format
    - blank lines and lines beginning '#' will be ignored
    - the file can also start with a fenced (with ---),
      yaml formatted header section
      (Note keys should not contain '.'s)

    Example

    ::

        # an ignored comment
        ---
        author: J Smith
        year: 1999
        ---
        8 2
        1 0 3  2.  0.
        1 1 3  6.  0.

    """
    meta_data = {}

    in_yaml = False
    yaml_lines = []
    protected_keys = ["atomic_number", "num_shells", "element", "basis_type", "content"]
    parsing_data = False
    content = []

    if isinstance(basis_file, str):
        basis_file = pathlib.Path(basis_file)

    if isinstance(basis_file, pathlib.Path):
        contentlines = basis_file.read_text().splitlines()
        basis_file_name = basis_file.name
    else:
        basis_file.seek(0)
        contentlines = basis_file.read().splitlines()
        try:
            basis_file_name = basis_file.name
        except AttributeError:
            basis_file_name = "StringIO"

    for line in contentlines:
        # ignore commented and blank lines
        if line.strip().startswith("#") or not line.strip():
            continue
        if line.strip() == "---" and not parsing_data:
            if not in_yaml:
                in_yaml = True
                continue
            else:
                head_data = yaml.safe_load("\n".join(yaml_lines))
                head_data = {} if not head_data else head_data
                if not isinstance(head_data, dict):
                    raise ParsingError(
                        "the header data could not be read for file: {}".format(
                            basis_file_name
                        )
                    )
                if set(head_data.keys()).intersection(protected_keys):
                    raise ParsingError(
                        "the header data contained a forbidden key(s) "
                        "{} for file: {}".format(protected_keys, basis_file_name)
                    )
                meta_data = head_data
                in_yaml = False
                parsing_data = True
                continue
        if in_yaml:
            yaml_lines.append(line)
            continue

        parsing_data = True

        content.append(line.strip())

    data = parse_bsets_stdin("\n".join(content), isolated=True)
    if len(data) > 1:
        raise ParsingError(
            "the basis set string contains more than one basis set: {}".format(
                list(data.keys())
            )
        )
    atomic_symbol = list(data.keys())[0]

    meta_data["atomic_number"] = atomic_number = SYMBOLS_R[atomic_symbol]
    meta_data["element"] = atomic_symbol
    meta_data["basis_type"] = basis_type = data[atomic_symbol]["type"]
    meta_data["num_shells"] = num_shells = len(data[atomic_symbol]["bs"])
    meta_data["orbital_types"] = [o["type"] for o in data[atomic_symbol]["bs"]]

    # the input atomic number may be > 100, but we should standardise this in the stored file
    first_line = content[0].strip().split()
    if len(first_line) != 2 or first_line[1] != str(num_shells):
        raise ParsingError(
            "The first line should contain only the atomic id and num shells ({}): '{}' for file {}".format(
                num_shells, line, basis_file_name
            )
        )
    newline = "{0} {1}".format(
        atomic_number if basis_type == "all-electron" else 200 + atomic_number,
        num_shells,
    )
    content[0] = newline

    return meta_data, "\n".join(content)
Example #30
0
    def get_structuredata(self):
        """
        Return a StructureData object based on the data in the input file.
        
        This uses all of the data in the input file to do the necessary unit 
        conversion, ect. and then creates an AiiDa StructureData object.
    
        All of the names corresponding of the Kind objects composing the 
        StructureData object will match those found in the ATOMIC_SPECIES 
        block, so the pseudopotentials can be linked to the calculation using 
        the kind.name for each specific type of atom (in the event that you 
        wish to use different pseudo's for two or more of the same atom).
    
        :return: StructureData object of the structure in the input file
        :rtype: aiida.orm.data.structure.StructureData
        :raises aiida.common.exceptions.ParsingError: if there are issues
            parsing the input.
        """
        # CELL_PARAMETERS are present.
        if self.cell_parameters is None:
            raise ParsingError(
                'CELL_PARAMETERS not found while parsing the input file. This '
                'card is needed for AiiDa.')

        # Figure out the factor needed to convert the lattice vectors
        # to Angstroms.
        # TODO: ***ASK GEORGE IF I SHOULD MULTIPLY OR DIVIDE BY ALAT***
        cell_units = self.cell_parameters.get('units')
        if (cell_units == 'alat') or (cell_units is None):
            # Try to determine the value of alat from the namelist.
            celldm1 = self.namelists['SYSTEM'].get('celldm(1)')
            a = self.namelists['SYSTEM'].get('a')
            # Only one of 'celldm(1)' or 'a' can be set.
            if (celldm1 is not None) and (a is not None):
                raise ParsingError(
                    "Both 'celldm(1)' and 'a' were set in the input file.")
            elif celldm1 is not None:
                cell_conv_factor = celldm1 * bohr_to_ang  # celldm(1) in Bohr
            elif a is not None:
                cell_conv_factor = a  # a is in Angstroms
            else:
                if cell_units is None:
                    cell_conv_factor = bohr_to_ang  # QE assumes Bohr
                else:
                    raise ParsingError(
                        "Unable to determine the units of the lattice vectors."
                    )
        elif cell_units == 'bohr':
            cell_conv_factor = bohr_to_ang
        elif cell_units == 'angstrom':
            cell_conv_factor = 1.0
        else:
            raise ParsingError(
                "Unable to determine the units of the lattice vectors.")

        # Get the lattice vectors and convert them to units of Angstroms.
        cell = np.array(self.cell_parameters['cell']) * cell_conv_factor

        # Get the positions and convert them to [x, y, z] Angstrom vectors.
        pos_units = self.atomic_positions['units']
        positions = np.array(self.atomic_positions['positions'])
        if pos_units in (None, 'alat'):  # QE assumes alat
            alat = np.linalg.norm(cell[0])  # Cell in Ang, so alat in Ang
            positions *= alat
        elif pos_units == 'bohr':
            positions = positions * bohr_to_ang
        elif pos_units == 'angstrom':
            pass
        elif pos_units == 'crystal':
            positions = np.dot(positions, cell)  # rotate into [x y z] basis
        else:
            raise ParsingError(
                'Unable to determine to convert positions to [x y z] Angstrom.'
            )

        # Get the atom names corresponding to positions.
        names = self.atomic_positions['names']

        # Create a dictionary that maps an atom name to it's mass.
        mass_dict = dict(
            zip(self.atomic_species['names'], self.atomic_species['masses']))

        # Use the names to figure out the atomic symbols.
        symbols = []
        for name in names:
            candiates = [
                s for s in _valid_symbols if name.lower().startswith(s.lower())
            ]
            if len(candiates) == 0:
                raise ParsingError(
                    'Unable to figure out the element represented by the '
                    'label, {}, in the input file.'.format(name))
            # Choose the longest match, since, for example, S and Si match Si.
            symbols.append(max(candiates, key=lambda x: len(x)))

        # Now that we have the names and their corresponding symbol and mass, as
        # well as the positions and cell in units of Angstroms, we create the
        # StructureData object.
        structuredata = StructureData(cell=cell)
        for name, symbol, position in zip(names, symbols, positions):
            mass = mass_dict[name]
            structuredata.append_atom(name=name,
                                      symbols=symbol,
                                      position=position,
                                      mass=mass)
        return structuredata