Python clean_lines Examples, pymatgen.util.io_utils.clean_lines Python Examples

Example #1

0

Show file

    def from_string(cls, string: str):
        """ Reads an Incar object from a string.

        This method is different from that of Incar superclass at it does not
        support ";" semantic which split the incar flags.

        Args:
            string (str): Incar string

        Returns:
            ViseIncar object
        """
        lines = list(clean_lines(string.splitlines()))
        params = {}
        for line in lines:
            # YK: Support the split of ";" semantic in INCAR file
            for sub_line in line.split(";"):
                m = re.match(r'(\w+)\s*=\s*(.*)', sub_line)
                if m:
                    key = m.group(1).strip()
                    val = m.group(2).strip()
                    val = ViseIncar.proc_val(key, val)
                    params[key] = val

        return cls(params)

Example #2

0

Show file

File: inputs.py Project: htz1992213/pymatgen-1

    def from_string(header_str):
        """
        Reads Header string and returns Header object if header was
        generated by pymatgen.
        Note: Checks to see if generated by pymatgen, if not it is impossible
            to generate structure object so it is not possible to generate
            header object and routine ends

        Args:
            header_str: pymatgen generated feff.inp header

        Returns:
            Structure object.
        """
        lines = tuple(clean_lines(header_str.split("\n"), False))
        comment1 = lines[0]
        feffpmg = comment1.find("pymatgen")
        if feffpmg == -1:
            feffpmg = False
        if feffpmg:
            comment2 = " ".join(lines[1].split()[2:])

            source = " ".join(lines[2].split()[2:])
            basis_vec = lines[6].split(":")[-1].split()
            # a, b, c
            a = float(basis_vec[0])
            b = float(basis_vec[1])
            c = float(basis_vec[2])
            lengths = [a, b, c]
            # alpha, beta, gamma
            basis_ang = lines[7].split(":")[-1].split()
            alpha = float(basis_ang[0])
            beta = float(basis_ang[1])
            gamma = float(basis_ang[2])
            angles = [alpha, beta, gamma]

            lattice = Lattice.from_parameters(*lengths, *angles)

            natoms = int(lines[8].split(":")[-1].split()[0])

            atomic_symbols = []
            for i in range(9, 9 + natoms):
                atomic_symbols.append(lines[i].split()[2])

            # read the atomic coordinates
            coords = []
            for i in range(natoms):
                toks = lines[i + 9].split()
                coords.append([float(s) for s in toks[3:]])

            struct = Structure(lattice, atomic_symbols, coords, False, False,
                               False)

            h = Header(struct, source, comment2)

            return h

        raise ValueError(
            "Header not generated by pymatgen, cannot return header object")

Example #3

0

Show file

File: inputs.py Project: adozier/pymatgen

    def from_string(header_str):
        """
        Reads Header string and returns Header object if header was
        generated by pymatgen.
        Note: Checks to see if generated by pymatgen, if not it is impossible
            to generate structure object so it is not possible to generate
            header object and routine ends

        Args:
            header_str: pymatgen generated feff.inp header

        Returns:
            Structure object.
        """
        lines = tuple(clean_lines(header_str.split("\n"), False))
        comment1 = lines[0]
        feffpmg = comment1.find("pymatgen")

        if feffpmg:
            comment2 = ' '.join(lines[1].split()[2:])

            source = ' '.join(lines[2].split()[2:])
            basis_vec = lines[6].split(":")[-1].split()
            # a, b, c
            a = float(basis_vec[0])
            b = float(basis_vec[1])
            c = float(basis_vec[2])
            lengths = [a, b, c]
            # alpha, beta, gamma
            basis_ang = lines[7].split(":")[-1].split()
            alpha = float(basis_ang[0])
            beta = float(basis_ang[1])
            gamma = float(basis_ang[2])
            angles = [alpha, beta, gamma]

            lattice = Lattice.from_lengths_and_angles(lengths, angles)

            natoms = int(lines[8].split(":")[-1].split()[0])

            atomic_symbols = []
            for i in range(9, 9 + natoms):
                atomic_symbols.append(lines[i].split()[2])

            # read the atomic coordinates
            coords = []
            for i in range(natoms):
                toks = lines[i + 9].split()
                coords.append([float(s) for s in toks[3:]])

            struct = Structure(lattice, atomic_symbols, coords, False,
                                        False, False)

            h = Header(struct, source, comment2)

            return h
        else:
            return "Header not generated by pymatgen, cannot return header object"

Example #4

0

Show file

File: feffio.py Project: jesuansito/pymatgen

    def from_string(header_str):
        """
        Reads Header string and returns Header object if header was
        generated by pymatgen.

        Args:
            header_str:
                pymatgen generated feff.inp header

        Returns:
            structure object.
        """
        lines = tuple(clean_lines(header_str.split("\n"), False))
        comment = lines[0]
        feffpmg = comment.find("pymatgen")

        if feffpmg > 0:
            source = lines[1].split()[2]
            natoms = int(lines[7].split()[2])
            basis_vec = lines[5].split()

            a = float(basis_vec[2])
            b = float(basis_vec[3])
            c = float(basis_vec[4])

            lengths = [a, b, c]
            basis_ang = lines[6].split()

            alpha = float(basis_ang[2])
            beta = float(basis_ang[3])
            gamma = float(basis_ang[4])
            angles = [alpha, beta, gamma]

            lattice = Lattice.from_lengths_and_angles(lengths, angles)
            atomic_symbols = []

            for i in xrange(8, 8 + natoms):
                atomic_symbols.append(lines[i].split()[2])

            # read the atomic coordinates
            coords = []

            for i in xrange(natoms):
                toks = lines[i + 8].split()
                coords.append([float(s) for s in toks[3:]])

            struct_fromfile = Structure(lattice, atomic_symbols, coords, False,
                                        False, False)
            struct_fromfile.compound = lines[3].split()[2]

            h = Header(struct_fromfile, comment)
            h.set_source(source)

            return h
        else:
            return "Header not generated by pymatgen, " \
                   "cannot return header object"

Example #5

0

Show file

File: inputs.py Project: htz1992213/pymatgen-1

    def from_file(filename="feff.inp"):
        """
        Creates a Feff_tag dictionary from a PARAMETER or feff.inp file.

        Args:
            filename: Filename for either PARAMETER or feff.inp file

        Returns:
            Feff_tag object
        """
        with zopen(filename, "rt") as f:
            lines = list(clean_lines(f.readlines()))
        params = {}
        eels_params = []
        ieels = -1
        ieels_max = -1
        for i, line in enumerate(lines):
            m = re.match(r"([A-Z]+\d*\d*)\s*(.*)", line)
            if m:
                key = m.group(1).strip()
                val = m.group(2).strip()
                val = Tags.proc_val(key, val)
                if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"):
                    if key in ["ELNES", "EXELFS"]:
                        ieels = i
                        ieels_max = ieels + 5
                    else:
                        params[key] = val
            if ieels >= 0:
                if ieels <= i <= ieels_max:
                    if i == ieels + 1:
                        if int(line.split()[1]) == 1:
                            ieels_max -= 1
                    eels_params.append(line)

        if eels_params:
            if len(eels_params) == 6:
                eels_keys = [
                    "BEAM_ENERGY",
                    "BEAM_DIRECTION",
                    "ANGLES",
                    "MESH",
                    "POSITION",
                ]
            else:
                eels_keys = ["BEAM_ENERGY", "ANGLES", "MESH", "POSITION"]
            eels_dict = {
                "ENERGY": Tags._stringify_val(eels_params[0].split()[1:])
            }
            for k, v in zip(eels_keys, eels_params[1:]):
                eels_dict[k] = str(v)
            params[str(eels_params[0].split()[0])] = eels_dict

        return Tags(params)

Example #6

0

Show file

File: inputs.py Project: ExpHP/pymatgen

    def from_file(filename="feff.inp"):
        """
        Creates a Feff_tag dictionary from a PARAMETER or feff.inp file.

        Args:
            filename: Filename for either PARAMETER or feff.inp file

        Returns:
            Feff_tag object
        """
        with zopen(filename, "rt") as f:
            lines = list(clean_lines(f.readlines()))
        params = {}
        eels_params = []
        ieels = -1
        ieels_max = -1
        for i, line in enumerate(lines):
            m = re.match(r"([A-Z]+\d*\d*)\s*(.*)", line)
            if m:
                key = m.group(1).strip()
                val = m.group(2).strip()
                val = Tags.proc_val(key, val)
                if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"):
                    if key in ["ELNES", "EXELFS"]:
                        ieels = i
                        ieels_max = ieels + 5
                    else:
                        params[key] = val
            if ieels >= 0:
                if i >= ieels and i <= ieels_max:
                    if i == ieels + 1:
                        if int(line.split()[1]) == 1:
                            ieels_max -= 1
                    eels_params.append(line)

        if eels_params:
            if len(eels_params) == 6:
                eels_keys = ['BEAM_ENERGY', 'BEAM_DIRECTION', 'ANGLES', 'MESH', 'POSITION']
            else:
                eels_keys = ['BEAM_ENERGY', 'ANGLES', 'MESH', 'POSITION']
            eels_dict = {"ENERGY": Tags._stringify_val(eels_params[0].split()[1:])}
            for k, v in zip(eels_keys, eels_params[1:]):
                eels_dict[k] = str(v)
            params[str(eels_params[0].split()[0])] = eels_dict

        return Tags(params)

Example #7

0

Show file

    def from_string(string):
        """
        Reads an Incar object from a string.

        Args:
            string (str): Incar string

        Returns:
            Incar object
        """
        lines = list(clean_lines(string.splitlines()))
        params = {}
        for line in lines:
            m = re.match(r'(\w+)\s*=\s*(.*)', line)
            if m:
                key = m.group(1).strip()
                val = m.group(2).strip()
                val = Incar.proc_val(key, val)
                params[key] = val
        return Incar(params)

Example #8

0

Show file

File: vasp_input.py Project: qimin/pymatgen

    def from_file(filename):
        """
        Reads an Incar object from a file.

        Args:
            filename - Filename for file

        Returns:
            Incar object
        """
        with zopen(filename, "r") as f:
            lines = list(clean_lines(f.readlines()))
        params = {}
        for line in lines:
            m = re.match("(\w+)\s*=\s*(.*)", line)
            if m:
                key = m.group(1).strip()
                val = m.group(2).strip()
                val = Incar.proc_val(key, val)
                params[key] = val
        return Incar(params)

Example #9

0

Show file

File: feffio.py Project: zacharygibbs/pymatgen

    def from_file(filename="feff.inp"):
        """
        Creates a Feff_tag dictionary from a PARAMETER or feff.inp file.

        Args:
            filename: Filename for either PARAMETER or feff.inp file

        Returns:
            Feff_tag object
        """
        with zopen(filename, "r") as f:
            lines = list(clean_lines(f.readlines()))
        params = {}
        for line in lines:
            m = re.match("([A-Z]+\d*\d*)\s*(.*)", line)
            if m:
                key = m.group(1).strip()
                val = m.group(2).strip()
                val = FeffTags.proc_val(key, val)
                if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"):
                    params[key] = val
        return FeffTags(params)

Example #10

0

Show file

File: inputs.py Project: adozier/pymatgen

    def from_file(filename="feff.inp"):
        """
        Creates a Feff_tag dictionary from a PARAMETER or feff.inp file.

        Args:
            filename: Filename for either PARAMETER or feff.inp file

        Returns:
            Feff_tag object
        """
        with zopen(filename, "rt") as f:
            lines = list(clean_lines(f.readlines()))
        params = {}
        for line in lines:
            m = re.match("([A-Z]+\d*\d*)\s*(.*)", line)
            if m:
                key = m.group(1).strip()
                val = m.group(2).strip()
                val = Tags.proc_val(key, val)
                if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"):
                    params[key] = val
        return Tags(params)

Example #11

0

Show file

    def from_string(string):
        """
        Reads an PWInput object from a string.

        Args:
            string (str): PWInput string

        Returns:
            PWInput object
        """
        lines = list(clean_lines(string.splitlines()))

        def input_mode(line):
            if line[0] == "&":
                return ("sections", line[1:].lower())
            elif "ATOMIC_SPECIES" in line:
                return ("pseudo", )
            elif "K_POINTS" in line:
                return ("kpoints", line.split("{")[1][:-1])
            elif "CELL_PARAMETERS" in line or "ATOMIC_POSITIONS" in line:
                return ("structure", line.split("{")[1][:-1])
            elif line == "/":
                return None
            else:
                return mode

        sections = {
            "control": {},
            "system": {},
            "electrons": {},
            "ions": {},
            "cell": {}
        }
        pseudo = {}
        pseudo_index = 0
        lattice = []
        species = []
        coords = []
        structure = None
        site_properties = {"pseudo": []}
        mode = None
        for line in lines:
            mode = input_mode(line)
            if mode is None:
                pass
            elif mode[0] == "sections":
                section = mode[1]
                m = re.match(r'(\w+)\(?(\d*?)\)?\s*=\s*(.*)', line)
                if m:
                    key = m.group(1).strip()
                    key_ = m.group(2).strip()
                    val = m.group(3).strip()
                    if key_ != "":
                        if sections[section].get(key, None) is None:
                            val_ = [0.0] * 20  # MAX NTYP DEFINITION
                            val_[int(key_) - 1] = PWInput.proc_val(key, val)
                            sections[section][key] = val_

                            site_properties[key] = []
                        else:
                            sections[section][key][int(key_) -
                                                   1] = PWInput.proc_val(
                                                       key, val)
                    else:
                        sections[section][key] = PWInput.proc_val(key, val)

            elif mode[0] == "pseudo":
                m = re.match(r'(\w+)\s+(\d*.\d*)\s+(.*)', line)
                if m:
                    pseudo[m.group(1).strip()] = {}
                    pseudo[m.group(1).strip()]["index"] = pseudo_index
                    pseudo[m.group(1).strip()]["pseudopot"] = m.group(
                        3).strip()
                    pseudo_index += 1
            elif mode[0] == "kpoints":
                m = re.match(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)',
                             line)
                if m:
                    kpoints_grid = (int(m.group(1)), int(m.group(2)),
                                    int(m.group(3)))
                    kpoints_shift = (int(m.group(4)), int(m.group(5)),
                                     int(m.group(6)))
                else:
                    kpoints_mode = mode[1]
            elif mode[0] == "structure":
                m_l = re.match(
                    r'(-?\d+\.?\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line)
                m_p = re.match(
                    r'(\w+)\s+(-?\d+\.\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)',
                    line)
                if m_l:
                    lattice += [
                        float(m_l.group(1)),
                        float(m_l.group(2)),
                        float(m_l.group(3))
                    ]
                elif m_p:
                    site_properties["pseudo"].append(
                        pseudo[m_p.group(1)]["pseudopot"])
                    species += [
                        pseudo[m_p.group(1)]["pseudopot"].split(".")[0]
                    ]
                    coords += [[
                        float(m_p.group(2)),
                        float(m_p.group(3)),
                        float(m_p.group(4))
                    ]]

                    for k, v in site_properties.items():
                        if k != "pseudo":
                            site_properties[k].append(sections['system'][k][
                                pseudo[m_p.group(1)]["index"]])
                if mode[1] == "angstrom":
                    coords_are_cartesian = True
                elif mode[1] == "crystal":
                    coords_are_cartesian = False

        structure = Structure(Lattice(lattice),
                              species,
                              coords,
                              coords_are_cartesian=coords_are_cartesian,
                              site_properties=site_properties)
        return PWInput(structure=structure,
                       control=sections["control"],
                       system=sections["system"],
                       electrons=sections["electrons"],
                       ions=sections["ions"],
                       cell=sections["cell"],
                       kpoints_mode=kpoints_mode,
                       kpoints_grid=kpoints_grid,
                       kpoints_shift=kpoints_shift)

Example #12

0

Show file

File: data.py Project: setten/pymatgen

    def from_file(cls, filename, atom_style="full", sort_id=False):
        """
        Constructor from parsing a file.

        Args:
            filename (str): Filename to read.
            atom_style (str): Associated atom_style. Default to "full".
            sort_id (bool): Whether sort each section by id. Default to
                True.

        """
        with open(filename) as f:
            lines = f.readlines()
        clines = list(clean_lines(lines))
        section_marks = [i for i, l in enumerate(clines) if l
                         in itertools.chain(*SECTION_KEYWORDS.values())]
        parts = np.split(clines, section_marks)

        # First, parse header
        float_group = r'([0-9eE.+-]+)'
        header_pattern = {}
        header_pattern["counts"] = r'^\s*(\d+)\s+([a-zA-Z]+)$'
        header_pattern["types"] = r'^\s*(\d+)\s+([a-zA-Z]+)\s+types$'
        header_pattern["bounds"] = r'^\s*{}$'.format(r'\s+'.join(
            [float_group] * 2 + [r"([xyz])lo \3hi"]))
        header_pattern["tilt"] = r'^\s*{}$'.format(r'\s+'.join(
            [float_group] * 3 + ["xy xz yz"]))

        header = {"counts": {}, "types": {}}
        bounds = {}
        for l in parts[0]:
            match = None
            for k, v in header_pattern.items():
                match = re.match(v, l)
                if match:
                    break
                else:
                    continue
            if match and k in ["counts", "types"]:
                header[k][match.group(2)] = int(match.group(1))
            elif match and k == "bounds":
                g = match.groups()
                bounds[g[2]] = [float(i) for i in g[:2]]
            elif match and k == "tilt":
                header["tilt"] = [float(i) for i in match.groups()]
        header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"]

        # Then, parse each section
        topo_sections = SECTION_KEYWORDS["molecule"]

        def parse_section(single_section_lines):
            kw = single_section_lines[0]

            if kw in SECTION_KEYWORDS["ff"] and kw != "PairIJ Coeffs":
                parse_line = lambda l: {"coeffs": [literal_eval(x)
                                                   for x in l[1:]]}
            elif kw == "PairIJ Coeffs":
                parse_line = lambda l: {"id1": int(l[0]), "id2": int(l[1]),
                                        "coeffs": [literal_eval(x)
                                                   for x in l[2:]]}
            elif kw in topo_sections:
                n = {"Bonds": 2, "Angles": 3, "Dihedrals": 4, "Impropers": 4}
                parse_line = lambda l: {"type": int(l[1]), kw[:-1].lower():
                    [int(x) for x in l[2:n[kw] + 2]]}
            elif kw == "Atoms":
                keys = ATOMS_LINE_FORMAT[atom_style][:]
                sample_l = single_section_lines[1].split()
                if len(sample_l) == len(keys) + 1:
                    pass
                elif len(sample_l) == len(keys) + 4:
                    keys += ["nx", "ny", "nz"]
                else:
                    warnings.warn("Atoms section format might be imcompatible"
                                  " with atom_style %s." % atom_style)
                float_keys = [k for k in keys if k in ATOMS_FLOATS]
                parse_line = lambda l: {k: float(v) if k in float_keys
                else int(v) for (k, v) in zip(keys, l[1:len(keys) + 1])}
            elif kw == "Velocities":
                parse_line = lambda l: {"velocity": [float(x)
                                                     for x in l[1:4]]}
            elif kw == "Masses":
                parse_line = lambda l: {"mass": float(l[1])}
            else:
                warnings.warn("%s section parser has not been implemented. "
                              "Skipping..." % kw)
                return kw, []

            section = []
            splitted_lines = [l.split() for l in single_section_lines[1:]]
            if sort_id and kw != "PairIJ Coeffs":
                splitted_lines = sorted(splitted_lines,
                                        key=lambda l: int(l[0]))
            for l in splitted_lines:
                line_data = parse_line(l)
                if kw != "PairIJ Coeffs":
                    line_data["id"] = int(l[0])
                section.append(line_data)
            return kw, section

        err_msg = "Bad LAMMPS data format where "
        body = {}
        seen_atoms = False
        for part in parts[1:]:
            name, section = parse_section(part)
            if name == "Atoms":
                seen_atoms = True
            if name in ["Velocities"] + topo_sections and not seen_atoms:
                raise RuntimeError(err_msg + "%s section appears before"
                                             " Atoms section" % name)
            body.update({name: section})

        err_msg += "Nos. of {} do not match between header and {} section"
        assert len(body["Masses"]) == header["types"]["atom"], \
            err_msg.format("atom types", "Masses")
        atom_sections = ["Atoms", "Velocities"] \
            if body.get("Velocities") else ["Atoms"]
        for s in atom_sections:
            assert len(body[s]) == header["counts"]["atoms"], \
                err_msg.format("atoms", s)
        for s in topo_sections:
            if header["counts"].get(s.lower(), 0) > 0:
                assert len(body[s]) == header["counts"][s.lower()], \
                    err_msg.format(s.lower(), s)

        items = {k.lower(): body[k] for k in ["Masses", "Atoms"]}
        items["box_bounds"] = header["bounds"]
        items["box_tilt"] = header.get("tilt")
        items["velocities"] = body.get("Velocities")
        ff_kws = [k for k in body.keys() if k in SECTION_KEYWORDS["ff"]]
        items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \
            else None
        topo_kws = [k for k in body.keys()
                    if k in SECTION_KEYWORDS["molecule"]]
        items["topology"] = {k: body[k] for k in topo_kws} \
            if topo_kws else None
        items["atom_style"] = atom_style
        return cls(**items)

Example #13

0

Show file

File: get_Symmetry.py Project: shhebing/cavd

    def from_string(data, default_names=None, read_velocities=True):
        """
        Reads a Poscar from a string.

        The code will try its best to determine the elements in the POSCAR in
        the following order:
        1. If default_names are supplied and valid, it will use those. Usually,
        default names comes from an external source, such as a POTCAR in the
        same directory.
        2. If there are no valid default names but the input file is Vasp5-like
        and contains element symbols in the 6th line, the code will use that.
        3. Failing (2), the code will check if a symbol is provided at the end
        of each coordinate.

        If all else fails, the code will just assign the first n elements in
        increasing atomic number, where n is the number of species, to the
        Poscar. For example, H, He, Li, ....  This will ensure at least a
        unique element is assigned to each site and any analysis that does not
        require specific elemental properties should work fine.

        Args:
            data (str): String containing Poscar data.
            default_names ([str]): Default symbols for the POSCAR file,
                usually coming from a POTCAR in the same directory.
            read_velocities (bool): Whether to read or not velocities if they
                are present in the POSCAR. Default is True.

        Returns:
            Poscar object.
        """
        # "^\s*$" doesn't match lines with no whitespace
        chunks = re.split(r"\n\s*\n", data.rstrip(), flags=re.MULTILINE)
        try:
            if chunks[0] == "":
                chunks.pop(0)
                chunks[0] = "\n" + chunks[0]
        except IndexError:
            raise ValueError("Empty POSCAR")

        # Parse positions
        lines = tuple(clean_lines(chunks[0].split("\n"), False))
        comment = lines[0]
        scale = float(lines[1])
        lattice = np.array([[float(i) for i in line.split()]
                            for line in lines[2:5]])
        if scale < 0:
            # In vasp, a negative scale factor is treated as a volume. We need
            # to translate this to a proper lattice vector scaling.
            vol = abs(det(lattice))
            lattice *= (-scale / vol)**(1 / 3)
        else:
            lattice *= scale

        vasp5_symbols = False
        try:
            natoms = [int(i) for i in lines[5].split()]
            ipos = 6
        except ValueError:
            vasp5_symbols = True
            symbols = lines[5].split()
            """
            Atoms and number of atoms in POSCAR written with vasp appear on 
            multiple lines when atoms of the same type are not grouped together 
            and more than 20 groups are then defined ...
            
            Example :
            
            Cr16 Fe35 Ni2
               1.00000000000000
                 8.5415010000000002   -0.0077670000000000   -0.0007960000000000
                -0.0077730000000000    8.5224019999999996    0.0105580000000000
                -0.0007970000000000    0.0105720000000000    8.5356889999999996
               Fe   Cr   Fe   Cr   Fe   Cr   Fe   Cr   Fe   Cr   Fe   Cr   Fe   Cr   Fe   Ni   Fe   Cr   Fe   Cr
               Fe   Ni   Fe   Cr   Fe
                 1     1     2     4     2     1     1     1     2     1     1     1     4     1     1     1     5     3     6     1
                 2     1     3     2     5
            Direct
              ...
            """
            nlines_symbols = 1
            for nlines_symbols in range(1, 11):
                try:
                    int(lines[5 + nlines_symbols].split()[0])
                    break
                except ValueError:
                    pass
            for iline_symbols in range(6, 5 + nlines_symbols):
                symbols.extend(lines[iline_symbols].split())
            natoms = []
            iline_natoms_start = 5 + nlines_symbols
            for iline_natoms in range(iline_natoms_start,
                                      iline_natoms_start + nlines_symbols):
                natoms.extend([int(i) for i in lines[iline_natoms].split()])
            atomic_symbols = list()
            for i in range(len(natoms)):
                atomic_symbols.extend([symbols[i]] * natoms[i])
            ipos = 5 + 2 * nlines_symbols

        postype = lines[ipos].split()[0]

        sdynamics = False
        # Selective dynamics
        if postype[0] in "sS":
            sdynamics = True
            ipos += 1
            postype = lines[ipos].split()[0]

        cart = postype[0] in "cCkK"
        nsites = sum(natoms)

        # If default_names is specified (usually coming from a POTCAR), use
        # them. This is in line with Vasp"s parsing order that the POTCAR
        # specified is the default used.
        if default_names:
            try:
                atomic_symbols = []
                for i in range(len(natoms)):
                    atomic_symbols.extend([default_names[i]] * natoms[i])
                vasp5_symbols = True
            except IndexError:
                pass

        if not vasp5_symbols:
            ind = 3 if not sdynamics else 6
            try:
                # Check if names are appended at the end of the coordinates.
                atomic_symbols = [
                    l.split()[ind] for l in lines[ipos + 1:ipos + 1 + nsites]
                ]
                # Ensure symbols are valid elements
                if not all(
                    [Element.is_valid_symbol(sym) for sym in atomic_symbols]):
                    raise ValueError("Non-valid symbols detected.")
                vasp5_symbols = True
            except (ValueError, IndexError):
                # Defaulting to false names.
                atomic_symbols = []
                for i in range(len(natoms)):
                    sym = Element.from_Z(i + 1).symbol
                    atomic_symbols.extend([sym] * natoms[i])
                warnings.warn("Elements in POSCAR cannot be determined. "
                              "Defaulting to false names %s." %
                              " ".join(atomic_symbols))

        # read the atomic coordinates
        coords = []
        selective_dynamics = list() if sdynamics else None
        for i in range(nsites):
            toks = lines[ipos + 1 + i].split()
            crd_scale = scale if cart else 1
            coords.append([float(j) * crd_scale for j in toks[:3]])
            if sdynamics:
                selective_dynamics.append(
                    [tok.upper()[0] == "T" for tok in toks[3:6]])

        if read_velocities:
            # Parse velocities if any
            velocities = []
            if len(chunks) > 1:
                for line in chunks[1].strip().split("\n"):
                    velocities.append([float(tok) for tok in line.split()])

            # Parse the predictor-corrector data
            predictor_corrector = []
            predictor_corrector_preamble = None

            if len(chunks) > 2:
                lines = chunks[2].strip().split("\n")
                # There are 3 sets of 3xN Predictor corrector parameters
                # So can't be stored as a single set of "site_property"

                # First line in chunk is a key in CONTCAR
                # Second line is POTIM
                # Third line is the thermostat parameters
                predictor_corrector_preamble = (lines[0] + "\n" + lines[1] +
                                                "\n" + lines[2])
                # Rest is three sets of parameters, each set contains
                # x, y, z predictor-corrector parameters for every atom in orde
                lines = lines[3:]
                for st in range(nsites):
                    d1 = [float(tok) for tok in lines[st].split()]
                    d2 = [float(tok) for tok in lines[st + nsites].split()]
                    d3 = [float(tok) for tok in lines[st + 2 * nsites].split()]
                    predictor_corrector.append([d1, d2, d3])
        else:
            velocities = None
            predictor_corrector = None
            predictor_corrector_preamble = None

        return Poscar_new(
            atomic_symbols,
            coords,
            lattice,
            comment,
            selective_dynamics,
            vasp5_symbols,
            velocities=velocities,
            predictor_corrector=predictor_corrector,
            predictor_corrector_preamble=predictor_corrector_preamble)

Example #14

0

Show file

File: data.py Project: OlgaGKononova/pymatgen

    def from_file(cls, filename, atom_style="full", sort_id=False):
        """
        Constructor that parses a file.

        Args:
            filename (str): Filename to read.
            atom_style (str): Associated atom_style. Default to "full".
            sort_id (bool): Whether sort each section by id. Default to
                True.

        """
        with open(filename) as f:
            lines = f.readlines()
        kw_pattern = r"|".join(itertools.chain(*SECTION_KEYWORDS.values()))
        section_marks = [i for i, l in enumerate(lines)
                         if re.search(kw_pattern, l)]
        parts = np.split(lines, section_marks)

        float_group = r"([0-9eE.+-]+)"
        header_pattern = dict()
        header_pattern["counts"] = r"^\s*(\d+)\s+([a-zA-Z]+)$"
        header_pattern["types"] = r"^\s*(\d+)\s+([a-zA-Z]+)\s+types$"
        header_pattern["bounds"] = r"^\s*{}$".format(r"\s+".join(
            [float_group] * 2 + [r"([xyz])lo \3hi"]))
        header_pattern["tilt"] = r"^\s*{}$".format(r"\s+".join(
            [float_group] * 3 + ["xy xz yz"]))

        header = {"counts": {}, "types": {}}
        bounds = {}
        for l in clean_lines(parts[0][1:]):  # skip the 1st line
            match = None
            for k, v in header_pattern.items():
                match = re.match(v, l)
                if match:
                    break
                else:
                    continue
            if match and k in ["counts", "types"]:
                header[k][match.group(2)] = int(match.group(1))
            elif match and k == "bounds":
                g = match.groups()
                bounds[g[2]] = [float(i) for i in g[:2]]
            elif match and k == "tilt":
                header["tilt"] = [float(i) for i in match.groups()]
        header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"]

        def parse_section(sec_lines):
            title_info = sec_lines[0].split("#", 1)
            kw = title_info[0].strip()
            sio = StringIO("".join(sec_lines[2:]))  # skip the 2nd line
            df = pd.read_csv(sio, header=None, comment="#",
                             delim_whitespace=True)
            if kw.endswith("Coeffs") and not kw.startswith("PairIJ"):
                names = ["id"] + ["coeff%d" % i
                                  for i in range(1, df.shape[1])]
            elif kw == "PairIJ Coeffs":
                names = ["id1", "id2"] + ["coeff%d" % i
                                          for i in range(1, df.shape[1] - 1)]
                df.index.name = None
            elif kw in SECTION_HEADERS:
                names = ["id"] + SECTION_HEADERS[kw]
            elif kw == "Atoms":
                names = ["id"] + ATOMS_HEADERS[atom_style]
                if df.shape[1] == len(names):
                    pass
                elif df.shape[1] == len(names) + 3:
                    names += ["nx", "ny", "nz"]
                else:
                    raise ValueError("Format in Atoms section inconsistent"
                                     " with atom_style %s" % atom_style)
            else:
                raise NotImplementedError("Parser for %s section"
                                          " not implemented" % kw)
            df.columns = names
            if sort_id:
                sort_by = "id" if kw != "PairIJ Coeffs" else ["id1", "id2"]
                df.sort_values(sort_by, inplace=True)
            if "id" in df.columns:
                df.set_index("id", drop=True, inplace=True)
                df.index.name = None
            return kw, df

        err_msg = "Bad LAMMPS data format where "
        body = {}
        seen_atoms = False
        for part in parts[1:]:
            name, section = parse_section(part)
            if name == "Atoms":
                seen_atoms = True
            if name in ["Velocities"] + SECTION_KEYWORDS["topology"] and \
                    not seen_atoms:  # Atoms must appear earlier than these
                raise RuntimeError(err_msg + "%s section appears before"
                                             " Atoms section" % name)
            body.update({name: section})

        err_msg += "Nos. of {} do not match between header and {} section"
        assert len(body["Masses"]) == header["types"]["atom"], \
            err_msg.format("atom types", "Masses")
        atom_sections = ["Atoms", "Velocities"] \
            if "Velocities" in body else ["Atoms"]
        for s in atom_sections:
            assert len(body[s]) == header["counts"]["atoms"], \
                err_msg.format("atoms", s)
        for s in SECTION_KEYWORDS["topology"]:
            if header["counts"].get(s.lower(), 0) > 0:
                assert len(body[s]) == header["counts"][s.lower()], \
                    err_msg.format(s.lower(), s)

        items = {k.lower(): body[k] for k in ["Masses", "Atoms"]}
        items["box_bounds"] = header["bounds"]
        items["box_tilt"] = header.get("tilt")
        items["velocities"] = body.get("Velocities")
        ff_kws = [k for k in body if k
                  in SECTION_KEYWORDS["ff"] + SECTION_KEYWORDS["class2"]]
        items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \
            else None
        topo_kws = [k for k in body if k in SECTION_KEYWORDS["topology"]]
        items["topology"] = {k: body[k] for k in topo_kws} \
            if topo_kws else None
        items["atom_style"] = atom_style
        return cls(**items)

Example #15

0

Show file

def struct_import_pymatgen(filename,ubin=""):
   """ import structure from QE input/output into Pymatgen structure
       adapted from pymatgen native PWio method
             pymatgen.io.pwscf from_file and from_string methods
  
       DOES NOT WORK ATM
  
       filname: (string) input/output QE file containing structure
       ubin: (string) dummy variable; for consistency with remaining scripts
   """
   import re
   from monty.io import zopen
   from monty.re import regrep 
   from pymatgen.util.io_utils import clean_lines
   from pymatgen.io.pwscf import PWInput

   print("# Reading atomic coordinates from: ", filename)
   with zopen(filename, "rt") as f:
            string = f.read()

   lines = list(clean_lines(string.splitlines()))

   print(lines)
   def input_mode(line):
       if line[0] == "&":
           return ("sections", line[1:].lower())
       elif "ATOMIC_SPECIES" in line:
           return ("pseudo", )
       elif "K_POINTS" in line:
           return ("kpoints", line.split("{")[1][:-1])
       elif "CELL_PARAMETERS" in line or "ATOMIC_POSITIONS" in line:
           return ("structure", line.split("{")[1][:-1])
       elif line == "/":
           return None
       else:
           return mode

   sections = {"control": {}, "system": {}, "electrons": {}, 
               "ions": {}, "cell":{}}
   pseudo = {}
   pseudo_index = 0
   lattice = []
   species = []
   coords = []
   structure = None
   site_properties = {"pseudo":[]}
   mode = None
   for line in lines:
       mode = input_mode(line)
       if mode == None:
           pass
       elif mode[0] == "sections":
           section = mode[1]
           m = re.match(r'(\w+)\(?(\d*?)\)?\s*=\s*(.*)', line)
           if m:
               key = m.group(1).strip()
               key_ = m.group(2).strip()
               val = m.group(3).strip()
               if key_ != "":
                   if sections[section].get(key, None) == None:
                       val_ = [0.0]*20 # MAX NTYP DEFINITION
                       val_[int(key_)-1] = PWInput.proc_val(key, val)
                       sections[section][key] = val_

                       site_properties[key] = []
                   else:
                       sections[section][key][int(key_)-1] = PWInput.proc_val(key, val) 
               else:
                   sections[section][key] = PWInput.proc_val(key, val)

#       elif mode[0] == "pseudo":
#           m = re.match(r'(\w+)\s+(\d*.\d*)\s+(.*)', line)
#           if m:
#               pseudo[m.group(1).strip()] = {}
#               pseudo[m.group(1).strip()]["index"] = pseudo_index
#               pseudo[m.group(1).strip()]["pseudopot"] = m.group(3).strip()
#               pseudo_index += 1
#       elif mode[0] == "kpoints":
#           m = re.match(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', line)
#           if m:
#               kpoints_grid = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
#               kpoints_shift = (int(m.group(4)), int(m.group(5)), int(m.group(6)))
#           else:
#               kpoints_mode = mode[1]
       elif mode[0] == "structure":
           m_l = re.match(r'(-?\d+\.?\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line)
           m_p = re.match(r'(\w+)\s+(-?\d+\.\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line)
           if m_l:
               lattice += [ float(m_l.group(1)), float(m_l.group(2)), float(m_l.group(3)) ]
           elif m_p:
               site_properties["pseudo"].append(pseudo[m_p.group(1)]["pseudopot"])
               species += [pseudo[m_p.group(1)]["pseudopot"].split(".")[0]]
               coords += [[float(m_p.group(2)), float(m_p.group(3)), float(m_p.group(4))]]

               for k, v in site_properties.items():
                   if k != "pseudo":
                       site_properties[k].append(sections['system'][k][pseudo[m_p.group(1)]["index"]])
           if mode[1] == "angstrom":
               coords_are_cartesian = True
           elif mode[1] == "crystal":
               coords_are_cartesian = False

   structure = Structure(Lattice(lattice), species, coords, 
                              coords_are_cartesian=coords_are_cartesian,
                              site_properties=site_properties)
   return structure

Example #16

0

Show file

File: data.py Project: fraricci/pymatgen

    def from_file(cls, filename, atom_style="full", sort_id=False):
        """
        Constructor that parses a file.

        Args:
            filename (str): Filename to read.
            atom_style (str): Associated atom_style. Default to "full".
            sort_id (bool): Whether sort each section by id. Default to
                True.

        """
        with open(filename) as f:
            lines = f.readlines()
        kw_pattern = r"|".join(itertools.chain(*SECTION_KEYWORDS.values()))
        section_marks = [i for i, l in enumerate(lines)
                         if re.search(kw_pattern, l)]
        parts = np.split(lines, section_marks)

        float_group = r"([0-9eE.+-]+)"
        header_pattern = dict()
        header_pattern["counts"] = r"^\s*(\d+)\s+([a-zA-Z]+)$"
        header_pattern["types"] = r"^\s*(\d+)\s+([a-zA-Z]+)\s+types$"
        header_pattern["bounds"] = r"^\s*{}$".format(r"\s+".join(
            [float_group] * 2 + [r"([xyz])lo \3hi"]))
        header_pattern["tilt"] = r"^\s*{}$".format(r"\s+".join(
            [float_group] * 3 + ["xy xz yz"]))

        header = {"counts": {}, "types": {}}
        bounds = {}
        for l in clean_lines(parts[0][1:]):  # skip the 1st line
            match = None
            for k, v in header_pattern.items():
                match = re.match(v, l)
                if match:
                    break
                else:
                    continue
            if match and k in ["counts", "types"]:
                header[k][match.group(2)] = int(match.group(1))
            elif match and k == "bounds":
                g = match.groups()
                bounds[g[2]] = [float(i) for i in g[:2]]
            elif match and k == "tilt":
                header["tilt"] = [float(i) for i in match.groups()]
        header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"]
        box = LammpsBox(header["bounds"], header.get("tilt"))

        def parse_section(sec_lines):
            title_info = sec_lines[0].split("#", 1)
            kw = title_info[0].strip()
            sio = StringIO("".join(sec_lines[2:]))  # skip the 2nd line
            df = pd.read_csv(sio, header=None, comment="#",
                             delim_whitespace=True)
            if kw.endswith("Coeffs") and not kw.startswith("PairIJ"):
                names = ["id"] + ["coeff%d" % i
                                  for i in range(1, df.shape[1])]
            elif kw == "PairIJ Coeffs":
                names = ["id1", "id2"] + ["coeff%d" % i
                                          for i in range(1, df.shape[1] - 1)]
                df.index.name = None
            elif kw in SECTION_HEADERS:
                names = ["id"] + SECTION_HEADERS[kw]
            elif kw == "Atoms":
                names = ["id"] + ATOMS_HEADERS[atom_style]
                if df.shape[1] == len(names):
                    pass
                elif df.shape[1] == len(names) + 3:
                    names += ["nx", "ny", "nz"]
                else:
                    raise ValueError("Format in Atoms section inconsistent"
                                     " with atom_style %s" % atom_style)
            else:
                raise NotImplementedError("Parser for %s section"
                                          " not implemented" % kw)
            df.columns = names
            if sort_id:
                sort_by = "id" if kw != "PairIJ Coeffs" else ["id1", "id2"]
                df.sort_values(sort_by, inplace=True)
            if "id" in df.columns:
                df.set_index("id", drop=True, inplace=True)
                df.index.name = None
            return kw, df

        err_msg = "Bad LAMMPS data format where "
        body = {}
        seen_atoms = False
        for part in parts[1:]:
            name, section = parse_section(part)
            if name == "Atoms":
                seen_atoms = True
            if name in ["Velocities"] + SECTION_KEYWORDS["topology"] and \
                    not seen_atoms:  # Atoms must appear earlier than these
                raise RuntimeError(err_msg + "%s section appears before"
                                             " Atoms section" % name)
            body.update({name: section})

        err_msg += "Nos. of {} do not match between header and {} section"
        assert len(body["Masses"]) == header["types"]["atom"], \
            err_msg.format("atom types", "Masses")
        atom_sections = ["Atoms", "Velocities"] \
            if "Velocities" in body else ["Atoms"]
        for s in atom_sections:
            assert len(body[s]) == header["counts"]["atoms"], \
                err_msg.format("atoms", s)
        for s in SECTION_KEYWORDS["topology"]:
            if header["counts"].get(s.lower(), 0) > 0:
                assert len(body[s]) == header["counts"][s.lower()], \
                    err_msg.format(s.lower(), s)

        items = {k.lower(): body[k] for k in ["Masses", "Atoms"]}
        items["velocities"] = body.get("Velocities")
        ff_kws = [k for k in body if k
                  in SECTION_KEYWORDS["ff"] + SECTION_KEYWORDS["class2"]]
        items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \
            else None
        topo_kws = [k for k in body if k in SECTION_KEYWORDS["topology"]]
        items["topology"] = {k: body[k] for k in topo_kws} \
            if topo_kws else None
        items["atom_style"] = atom_style
        items["box"] = box
        return cls(**items)

Example #17

0

Show file

    def from_string(header_str):
        """
        Reads Header string and returns Header object if header was
        generated by pymatgen.

        Args:
            header_str: pymatgen generated feff.inp header

        Returns:
            Structure object.
        """
        # Checks to see if generated by pymatgen, if not it is impossible to
        # generate structure object so it is not possible to generate header
        # object and routine ends

        lines = tuple(clean_lines(header_str.split("\n"), False))
        comment1 = lines[0]
        feffpmg = comment1.find("pymatgen")

        if feffpmg > 0:
            comment2 = ' '.join(lines[1].split()[2:])

            #This sec section gets information to create structure object

            source = ' '.join(lines[2].split()[2:])
            natoms = int(lines[8].split()[2])
            basis_vec = lines[6].split()

            a = float(basis_vec[2])
            b = float(basis_vec[3])
            c = float(basis_vec[4])

            lengths = [a, b, c]
            basis_ang = lines[7].split()

            alpha = float(basis_ang[2])
            beta = float(basis_ang[3])
            gamma = float(basis_ang[4])
            angles = [alpha, beta, gamma]

            lattice = Lattice.from_lengths_and_angles(lengths, angles)
            atomic_symbols = []

            for i in range(9, 9 + natoms):
                atomic_symbols.append(lines[i].split()[2])

            # read the atomic coordinates
            coords = []

            for i in range(natoms):
                toks = lines[i + 9].split()
                coords.append([float(s) for s in toks[3:]])

            #Structure object is now generated and Header object returned

            struct_fromfile = Structure(lattice, atomic_symbols, coords, False,
                                        False, False)

            h = Header(struct_fromfile, source, comment2)

            return h
        else:
            return "Header not generated by pymatgen, " \
                   "cannot return header object"

Example #18

0

Show file

File: data.py Project: setten/pymatgen

    def from_file(cls, filename, atom_style="full", sort_id=False):
        """
        Constructor from parsing a file.

        Args:
            filename (str): Filename to read.
            atom_style (str): Associated atom_style. Default to "full".
            sort_id (bool): Whether sort each section by id. Default to
                True.

        """
        with open(filename) as f:
            lines = f.readlines()
        clines = list(clean_lines(lines))
        section_marks = [
            i for i, l in enumerate(clines)
            if l in itertools.chain(*SECTION_KEYWORDS.values())
        ]
        parts = np.split(clines, section_marks)

        # First, parse header
        float_group = r'([0-9eE.+-]+)'
        header_pattern = {}
        header_pattern["counts"] = r'^\s*(\d+)\s+([a-zA-Z]+)$'
        header_pattern["types"] = r'^\s*(\d+)\s+([a-zA-Z]+)\s+types$'
        header_pattern["bounds"] = r'^\s*{}$'.format(
            r'\s+'.join([float_group] * 2 + [r"([xyz])lo \3hi"]))
        header_pattern["tilt"] = r'^\s*{}$'.format(
            r'\s+'.join([float_group] * 3 + ["xy xz yz"]))

        header = {"counts": {}, "types": {}}
        bounds = {}
        for l in parts[0]:
            match = None
            for k, v in header_pattern.items():
                match = re.match(v, l)
                if match:
                    break
                else:
                    continue
            if match and k in ["counts", "types"]:
                header[k][match.group(2)] = int(match.group(1))
            elif match and k == "bounds":
                g = match.groups()
                bounds[g[2]] = [float(i) for i in g[:2]]
            elif match and k == "tilt":
                header["tilt"] = [float(i) for i in match.groups()]
        header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"]

        # Then, parse each section
        topo_sections = SECTION_KEYWORDS["molecule"]

        def parse_section(single_section_lines):
            kw = single_section_lines[0]

            if kw in SECTION_KEYWORDS["ff"] and kw != "PairIJ Coeffs":
                parse_line = lambda l: {
                    "coeffs": [literal_eval(x) for x in l[1:]]
                }
            elif kw == "PairIJ Coeffs":
                parse_line = lambda l: {
                    "id1": int(l[0]),
                    "id2": int(l[1]),
                    "coeffs": [literal_eval(x) for x in l[2:]]
                }
            elif kw in topo_sections:
                n = {"Bonds": 2, "Angles": 3, "Dihedrals": 4, "Impropers": 4}
                parse_line = lambda l: {
                    "type": int(l[1]),
                    kw[:-1].lower(): [int(x) for x in l[2:n[kw] + 2]]
                }
            elif kw == "Atoms":
                keys = ATOMS_LINE_FORMAT[atom_style][:]
                sample_l = single_section_lines[1].split()
                if len(sample_l) == len(keys) + 1:
                    pass
                elif len(sample_l) == len(keys) + 4:
                    keys += ["nx", "ny", "nz"]
                else:
                    warnings.warn("Atoms section format might be imcompatible"
                                  " with atom_style %s." % atom_style)
                float_keys = [k for k in keys if k in ATOMS_FLOATS]
                parse_line = lambda l: {
                    k: float(v) if k in float_keys else int(v)
                    for (k, v) in zip(keys, l[1:len(keys) + 1])
                }
            elif kw == "Velocities":
                parse_line = lambda l: {"velocity": [float(x) for x in l[1:4]]}
            elif kw == "Masses":
                parse_line = lambda l: {"mass": float(l[1])}
            else:
                warnings.warn("%s section parser has not been implemented. "
                              "Skipping..." % kw)
                return kw, []

            section = []
            splitted_lines = [l.split() for l in single_section_lines[1:]]
            if sort_id and kw != "PairIJ Coeffs":
                splitted_lines = sorted(splitted_lines,
                                        key=lambda l: int(l[0]))
            for l in splitted_lines:
                line_data = parse_line(l)
                if kw != "PairIJ Coeffs":
                    line_data["id"] = int(l[0])
                section.append(line_data)
            return kw, section

        err_msg = "Bad LAMMPS data format where "
        body = {}
        seen_atoms = False
        for part in parts[1:]:
            name, section = parse_section(part)
            if name == "Atoms":
                seen_atoms = True
            if name in ["Velocities"] + topo_sections and not seen_atoms:
                raise RuntimeError(err_msg + "%s section appears before"
                                   " Atoms section" % name)
            body.update({name: section})

        err_msg += "Nos. of {} do not match between header and {} section"
        assert len(body["Masses"]) == header["types"]["atom"], \
            err_msg.format("atom types", "Masses")
        atom_sections = ["Atoms", "Velocities"] \
            if body.get("Velocities") else ["Atoms"]
        for s in atom_sections:
            assert len(body[s]) == header["counts"]["atoms"], \
                err_msg.format("atoms", s)
        for s in topo_sections:
            if header["counts"].get(s.lower(), 0) > 0:
                assert len(body[s]) == header["counts"][s.lower()], \
                    err_msg.format(s.lower(), s)

        items = {k.lower(): body[k] for k in ["Masses", "Atoms"]}
        items["box_bounds"] = header["bounds"]
        items["box_tilt"] = header.get("tilt")
        items["velocities"] = body.get("Velocities")
        ff_kws = [k for k in body.keys() if k in SECTION_KEYWORDS["ff"]]
        items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \
            else None
        topo_kws = [
            k for k in body.keys() if k in SECTION_KEYWORDS["molecule"]
        ]
        items["topology"] = {k: body[k] for k in topo_kws} \
            if topo_kws else None
        items["atom_style"] = atom_style
        return cls(**items)

Example #19

0

Show file

File: feffio.py Project: zacharygibbs/pymatgen

    def from_string(header_str):
        """
        Reads Header string and returns Header object if header was
        generated by pymatgen.

        Args:
            header_str: pymatgen generated feff.inp header

        Returns:
            Structure object.
        """
        # Checks to see if generated by pymatgen, if not it is impossible to
        # generate structure object so it is not possible to generate header
        # object and routine ends

        lines = tuple(clean_lines(header_str.split("\n"), False))
        comment1 = lines[0]
        feffpmg = comment1.find("pymatgen")

        if feffpmg > 0:
            comment2 = ' '.join(lines[1].split()[2:])

            #This sec section gets information to create structure object

            source = ' '.join(lines[2].split()[2:])
            natoms = int(lines[8].split()[2])
            basis_vec = lines[6].split()

            a = float(basis_vec[2])
            b = float(basis_vec[3])
            c = float(basis_vec[4])

            lengths = [a, b, c]
            basis_ang = lines[7].split()

            alpha = float(basis_ang[2])
            beta = float(basis_ang[3])
            gamma = float(basis_ang[4])
            angles = [alpha, beta, gamma]

            lattice = Lattice.from_lengths_and_angles(lengths, angles)
            atomic_symbols = []

            for i in xrange(9, 9 + natoms):
                atomic_symbols.append(lines[i].split()[2])

            # read the atomic coordinates
            coords = []

            for i in xrange(natoms):
                toks = lines[i + 9].split()
                coords.append([float(s) for s in toks[3:]])

            #Structure object is now generated and Header object returned

            struct_fromfile = Structure(lattice, atomic_symbols, coords, False,
                                        False, False)

            h = Header(struct_fromfile, source, comment2)

            return h
        else:
            return "Header not generated by pymatgen, " \
                   "cannot return header object"

Example #20

0

Show file

File: pwscf.py Project: davidwaroquiers/pymatgen

    def from_string(string):
        """
        Reads an PWInput object from a string.

        Args:
            string (str): PWInput string

        Returns:
            PWInput object
        """
        lines = list(clean_lines(string.splitlines()))

        def input_mode(line):
            if line[0] == "&":
                return ("sections", line[1:].lower())
            elif "ATOMIC_SPECIES" in line:
                return ("pseudo", )
            elif "K_POINTS" in line:
                return ("kpoints", line.split("{")[1][:-1])
            elif "CELL_PARAMETERS" in line or "ATOMIC_POSITIONS" in line:
                return ("structure", line.split("{")[1][:-1])
            elif line == "/":
                return None
            else:
                return mode

        sections = {"control": {}, "system": {}, "electrons": {}, 
                    "ions": {}, "cell":{}}
        pseudo = {}
        pseudo_index = 0
        lattice = []
        species = []
        coords = []
        structure = None
        site_properties = {"pseudo":[]}
        mode = None
        for line in lines:
            mode = input_mode(line)
            if mode == None:
                pass
            elif mode[0] == "sections":
                section = mode[1]
                m = re.match(r'(\w+)\(?(\d*?)\)?\s*=\s*(.*)', line)
                if m:
                    key = m.group(1).strip()
                    key_ = m.group(2).strip()
                    val = m.group(3).strip()
                    if key_ != "":
                        if sections[section].get(key, None) == None:
                            val_ = [0.0]*20 # MAX NTYP DEFINITION
                            val_[int(key_)-1] = PWInput.proc_val(key, val)
                            sections[section][key] = val_

                            site_properties[key] = []
                        else:
                            sections[section][key][int(key_)-1] = PWInput.proc_val(key, val) 
                    else:
                        sections[section][key] = PWInput.proc_val(key, val)

            elif mode[0] == "pseudo":
                m = re.match(r'(\w+)\s+(\d*.\d*)\s+(.*)', line)
                if m:
                    pseudo[m.group(1).strip()] = {}
                    pseudo[m.group(1).strip()]["index"] = pseudo_index
                    pseudo[m.group(1).strip()]["pseudopot"] = m.group(3).strip()
                    pseudo_index += 1
            elif mode[0] == "kpoints":
                m = re.match(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', line)
                if m:
                    kpoints_grid = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
                    kpoints_shift = (int(m.group(4)), int(m.group(5)), int(m.group(6)))
                else:
                    kpoints_mode = mode[1]
            elif mode[0] == "structure":
                m_l = re.match(r'(-?\d+\.?\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line)
                m_p = re.match(r'(\w+)\s+(-?\d+\.\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line)
                if m_l:
                    lattice += [ float(m_l.group(1)), float(m_l.group(2)), float(m_l.group(3)) ]
                elif m_p:
                    site_properties["pseudo"].append(pseudo[m_p.group(1)]["pseudopot"])
                    species += [pseudo[m_p.group(1)]["pseudopot"].split(".")[0]]
                    coords += [[float(m_p.group(2)), float(m_p.group(3)), float(m_p.group(4))]]

                    for k, v in site_properties.items():
                        if k != "pseudo":
                            site_properties[k].append(sections['system'][k][pseudo[m_p.group(1)]["index"]])
                if mode[1] == "angstrom":
                    coords_are_cartesian = True
                elif mode[1] == "crystal":
                    coords_are_cartesian = False

        structure = Structure(Lattice(lattice), species, coords, 
                              coords_are_cartesian=coords_are_cartesian,
                              site_properties=site_properties)
        return PWInput(structure=structure, control=sections["control"],
                       system=sections["system"], electrons=sections["electrons"], 
                       ions=sections["ions"], cell=sections["cell"], kpoints_mode=kpoints_mode,
                       kpoints_grid=kpoints_grid, kpoints_shift=kpoints_shift)

Example #21

0

Show file

File: vasp_input.py Project: qimin/pymatgen

    def from_string(data, default_names=None):
        """
        Reads a Poscar from a string.

        The code will try its best to determine the elements in the POSCAR in
        the following order:
        1. If default_names are supplied and valid, it will use those. Usually,
        default names comes from an external source, such as a POTCAR in the
        same directory.
        2. If there are no valid default names but the input file is Vasp5-like
        and contains element symbols in the 6th line, the code will use that.
        3. Failing (2), the code will check if a symbol is provided at the end
        of each coordinate.

        If all else fails, the code will just assign the first n elements in
        increasing atomic number, where n is the number of species, to the
        Poscar. For example, H, He, Li, ....  This will ensure at least a
        unique element is assigned to each site and any analysis that does not
        require specific elemental properties should work fine.

        Args:
            data:
                string containing Poscar data.
            default_names:
                default symbols for the POSCAR file, usually coming from a
                POTCAR in the same directory.

        Returns:
            Poscar object.
        """

        chunks = re.split("^\s*$", data.strip(), flags=re.MULTILINE)

        #Parse positions
        lines = tuple(clean_lines(chunks[0].split("\n"), False))
        comment = lines[0]
        scale = float(lines[1])
        lattice = np.array([map(float, line.split())
                            for line in lines[2:5]])
        if scale < 0:
            # In vasp, a negative scale factor is treated as a volume. We need
            # to translate this to a proper lattice vector scaling.
            vol = abs(det(lattice))
            lattice *= (-scale / vol) ** (1 / 3)
        else:
            lattice *= scale

        vasp5_symbols = False
        try:
            natoms = map(int, lines[5].split())
            ipos = 6
        except ValueError:
            vasp5_symbols = True
            symbols = lines[5].split()
            natoms = map(int, lines[6].split())
            atomic_symbols = list()
            for i in xrange(len(natoms)):
                atomic_symbols.extend([symbols[i]] * natoms[i])
            ipos = 7

        postype = lines[ipos].split()[0]

        sdynamics = False
        # Selective dynamics
        if postype[0] in "sS":
            sdynamics = True
            ipos += 1
            postype = lines[ipos].split()[0]

        cart = postype[0] in "cCkK"
        nsites = sum(natoms)

        # If default_names is specified (usually coming from a POTCAR), use
        # them. This is in line with Vasp"s parsing order that the POTCAR
        # specified is the default used.
        if default_names:
            try:
                atomic_symbols = []
                for i in xrange(len(natoms)):
                    atomic_symbols.extend([default_names[i]] * natoms[i])
                vasp5_symbols = True
            except IndexError:
                pass

        if not vasp5_symbols:
            ind = 3 if not sdynamics else 6
            try:
                #check if names are appended at the end of the coordinates.
                atomic_symbols = [l.split()[ind]
                                  for l in lines[ipos + 1:ipos + 1 + nsites]]
                #Ensure symbols are valid elements
                if not all([Element.is_valid_symbol(sym)
                            for sym in atomic_symbols]):
                    raise ValueError("Non-valid symbols detected.")
                vasp5_symbols = True
            except (ValueError, IndexError):
                #Defaulting to false names.
                atomic_symbols = []
                for i in xrange(len(natoms)):
                    sym = Element.from_Z(i + 1).symbol
                    atomic_symbols.extend([sym] * natoms[i])
                warnings.warn("Elements in POSCAR cannot be determined. "
                              "Defaulting to false names {}."
                              .format(" ".join(atomic_symbols)))

        # read the atomic coordinates
        coords = []
        selective_dynamics = list() if sdynamics else None
        for i in xrange(nsites):
            toks = lines[ipos + 1 + i].split()
            coords.append(map(float, toks[:3]))
            if sdynamics:
                selective_dynamics.append([tok.upper()[0] == "T"
                                           for tok in toks[3:6]])

        struct = Structure(lattice, atomic_symbols, coords, False, False, cart)

        #parse velocities if any
        velocities = []
        if len(chunks) > 1:
            for line in chunks[1].strip().split("\n"):
                velocities.append([float(tok) for tok in line.split()])

        predictor_corrector = []
        if len(chunks) > 2:
            lines = chunks[2].strip().split("\n")
            predictor_corrector.append([int(lines[0])])
            for line in lines[1:]:
                predictor_corrector.append([float(tok)
                                            for tok in line.split()])

        return Poscar(struct, comment, selective_dynamics, vasp5_symbols,
                      velocities=velocities,
                      predictor_corrector=predictor_corrector)