def from_string(cls, string: str): """ Reads an Incar object from a string. This method is different from that of Incar superclass at it does not support ";" semantic which split the incar flags. Args: string (str): Incar string Returns: ViseIncar object """ lines = list(clean_lines(string.splitlines())) params = {} for line in lines: # YK: Support the split of ";" semantic in INCAR file for sub_line in line.split(";"): m = re.match(r'(\w+)\s*=\s*(.*)', sub_line) if m: key = m.group(1).strip() val = m.group(2).strip() val = ViseIncar.proc_val(key, val) params[key] = val return cls(params)
def from_string(header_str): """ Reads Header string and returns Header object if header was generated by pymatgen. Note: Checks to see if generated by pymatgen, if not it is impossible to generate structure object so it is not possible to generate header object and routine ends Args: header_str: pymatgen generated feff.inp header Returns: Structure object. """ lines = tuple(clean_lines(header_str.split("\n"), False)) comment1 = lines[0] feffpmg = comment1.find("pymatgen") if feffpmg == -1: feffpmg = False if feffpmg: comment2 = " ".join(lines[1].split()[2:]) source = " ".join(lines[2].split()[2:]) basis_vec = lines[6].split(":")[-1].split() # a, b, c a = float(basis_vec[0]) b = float(basis_vec[1]) c = float(basis_vec[2]) lengths = [a, b, c] # alpha, beta, gamma basis_ang = lines[7].split(":")[-1].split() alpha = float(basis_ang[0]) beta = float(basis_ang[1]) gamma = float(basis_ang[2]) angles = [alpha, beta, gamma] lattice = Lattice.from_parameters(*lengths, *angles) natoms = int(lines[8].split(":")[-1].split()[0]) atomic_symbols = [] for i in range(9, 9 + natoms): atomic_symbols.append(lines[i].split()[2]) # read the atomic coordinates coords = [] for i in range(natoms): toks = lines[i + 9].split() coords.append([float(s) for s in toks[3:]]) struct = Structure(lattice, atomic_symbols, coords, False, False, False) h = Header(struct, source, comment2) return h raise ValueError( "Header not generated by pymatgen, cannot return header object")
def from_string(header_str): """ Reads Header string and returns Header object if header was generated by pymatgen. Note: Checks to see if generated by pymatgen, if not it is impossible to generate structure object so it is not possible to generate header object and routine ends Args: header_str: pymatgen generated feff.inp header Returns: Structure object. """ lines = tuple(clean_lines(header_str.split("\n"), False)) comment1 = lines[0] feffpmg = comment1.find("pymatgen") if feffpmg: comment2 = ' '.join(lines[1].split()[2:]) source = ' '.join(lines[2].split()[2:]) basis_vec = lines[6].split(":")[-1].split() # a, b, c a = float(basis_vec[0]) b = float(basis_vec[1]) c = float(basis_vec[2]) lengths = [a, b, c] # alpha, beta, gamma basis_ang = lines[7].split(":")[-1].split() alpha = float(basis_ang[0]) beta = float(basis_ang[1]) gamma = float(basis_ang[2]) angles = [alpha, beta, gamma] lattice = Lattice.from_lengths_and_angles(lengths, angles) natoms = int(lines[8].split(":")[-1].split()[0]) atomic_symbols = [] for i in range(9, 9 + natoms): atomic_symbols.append(lines[i].split()[2]) # read the atomic coordinates coords = [] for i in range(natoms): toks = lines[i + 9].split() coords.append([float(s) for s in toks[3:]]) struct = Structure(lattice, atomic_symbols, coords, False, False, False) h = Header(struct, source, comment2) return h else: return "Header not generated by pymatgen, cannot return header object"
def from_string(header_str): """ Reads Header string and returns Header object if header was generated by pymatgen. Args: header_str: pymatgen generated feff.inp header Returns: structure object. """ lines = tuple(clean_lines(header_str.split("\n"), False)) comment = lines[0] feffpmg = comment.find("pymatgen") if feffpmg > 0: source = lines[1].split()[2] natoms = int(lines[7].split()[2]) basis_vec = lines[5].split() a = float(basis_vec[2]) b = float(basis_vec[3]) c = float(basis_vec[4]) lengths = [a, b, c] basis_ang = lines[6].split() alpha = float(basis_ang[2]) beta = float(basis_ang[3]) gamma = float(basis_ang[4]) angles = [alpha, beta, gamma] lattice = Lattice.from_lengths_and_angles(lengths, angles) atomic_symbols = [] for i in xrange(8, 8 + natoms): atomic_symbols.append(lines[i].split()[2]) # read the atomic coordinates coords = [] for i in xrange(natoms): toks = lines[i + 8].split() coords.append([float(s) for s in toks[3:]]) struct_fromfile = Structure(lattice, atomic_symbols, coords, False, False, False) struct_fromfile.compound = lines[3].split()[2] h = Header(struct_fromfile, comment) h.set_source(source) return h else: return "Header not generated by pymatgen, " \ "cannot return header object"
def from_file(filename="feff.inp"): """ Creates a Feff_tag dictionary from a PARAMETER or feff.inp file. Args: filename: Filename for either PARAMETER or feff.inp file Returns: Feff_tag object """ with zopen(filename, "rt") as f: lines = list(clean_lines(f.readlines())) params = {} eels_params = [] ieels = -1 ieels_max = -1 for i, line in enumerate(lines): m = re.match(r"([A-Z]+\d*\d*)\s*(.*)", line) if m: key = m.group(1).strip() val = m.group(2).strip() val = Tags.proc_val(key, val) if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"): if key in ["ELNES", "EXELFS"]: ieels = i ieels_max = ieels + 5 else: params[key] = val if ieels >= 0: if ieels <= i <= ieels_max: if i == ieels + 1: if int(line.split()[1]) == 1: ieels_max -= 1 eels_params.append(line) if eels_params: if len(eels_params) == 6: eels_keys = [ "BEAM_ENERGY", "BEAM_DIRECTION", "ANGLES", "MESH", "POSITION", ] else: eels_keys = ["BEAM_ENERGY", "ANGLES", "MESH", "POSITION"] eels_dict = { "ENERGY": Tags._stringify_val(eels_params[0].split()[1:]) } for k, v in zip(eels_keys, eels_params[1:]): eels_dict[k] = str(v) params[str(eels_params[0].split()[0])] = eels_dict return Tags(params)
def from_file(filename="feff.inp"): """ Creates a Feff_tag dictionary from a PARAMETER or feff.inp file. Args: filename: Filename for either PARAMETER or feff.inp file Returns: Feff_tag object """ with zopen(filename, "rt") as f: lines = list(clean_lines(f.readlines())) params = {} eels_params = [] ieels = -1 ieels_max = -1 for i, line in enumerate(lines): m = re.match(r"([A-Z]+\d*\d*)\s*(.*)", line) if m: key = m.group(1).strip() val = m.group(2).strip() val = Tags.proc_val(key, val) if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"): if key in ["ELNES", "EXELFS"]: ieels = i ieels_max = ieels + 5 else: params[key] = val if ieels >= 0: if i >= ieels and i <= ieels_max: if i == ieels + 1: if int(line.split()[1]) == 1: ieels_max -= 1 eels_params.append(line) if eels_params: if len(eels_params) == 6: eels_keys = ['BEAM_ENERGY', 'BEAM_DIRECTION', 'ANGLES', 'MESH', 'POSITION'] else: eels_keys = ['BEAM_ENERGY', 'ANGLES', 'MESH', 'POSITION'] eels_dict = {"ENERGY": Tags._stringify_val(eels_params[0].split()[1:])} for k, v in zip(eels_keys, eels_params[1:]): eels_dict[k] = str(v) params[str(eels_params[0].split()[0])] = eels_dict return Tags(params)
def from_string(string): """ Reads an Incar object from a string. Args: string (str): Incar string Returns: Incar object """ lines = list(clean_lines(string.splitlines())) params = {} for line in lines: m = re.match(r'(\w+)\s*=\s*(.*)', line) if m: key = m.group(1).strip() val = m.group(2).strip() val = Incar.proc_val(key, val) params[key] = val return Incar(params)
def from_file(filename): """ Reads an Incar object from a file. Args: filename - Filename for file Returns: Incar object """ with zopen(filename, "r") as f: lines = list(clean_lines(f.readlines())) params = {} for line in lines: m = re.match("(\w+)\s*=\s*(.*)", line) if m: key = m.group(1).strip() val = m.group(2).strip() val = Incar.proc_val(key, val) params[key] = val return Incar(params)
def from_file(filename="feff.inp"): """ Creates a Feff_tag dictionary from a PARAMETER or feff.inp file. Args: filename: Filename for either PARAMETER or feff.inp file Returns: Feff_tag object """ with zopen(filename, "r") as f: lines = list(clean_lines(f.readlines())) params = {} for line in lines: m = re.match("([A-Z]+\d*\d*)\s*(.*)", line) if m: key = m.group(1).strip() val = m.group(2).strip() val = FeffTags.proc_val(key, val) if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"): params[key] = val return FeffTags(params)
def from_file(filename="feff.inp"): """ Creates a Feff_tag dictionary from a PARAMETER or feff.inp file. Args: filename: Filename for either PARAMETER or feff.inp file Returns: Feff_tag object """ with zopen(filename, "rt") as f: lines = list(clean_lines(f.readlines())) params = {} for line in lines: m = re.match("([A-Z]+\d*\d*)\s*(.*)", line) if m: key = m.group(1).strip() val = m.group(2).strip() val = Tags.proc_val(key, val) if key not in ("ATOMS", "POTENTIALS", "END", "TITLE"): params[key] = val return Tags(params)
def from_string(string): """ Reads an PWInput object from a string. Args: string (str): PWInput string Returns: PWInput object """ lines = list(clean_lines(string.splitlines())) def input_mode(line): if line[0] == "&": return ("sections", line[1:].lower()) elif "ATOMIC_SPECIES" in line: return ("pseudo", ) elif "K_POINTS" in line: return ("kpoints", line.split("{")[1][:-1]) elif "CELL_PARAMETERS" in line or "ATOMIC_POSITIONS" in line: return ("structure", line.split("{")[1][:-1]) elif line == "/": return None else: return mode sections = { "control": {}, "system": {}, "electrons": {}, "ions": {}, "cell": {} } pseudo = {} pseudo_index = 0 lattice = [] species = [] coords = [] structure = None site_properties = {"pseudo": []} mode = None for line in lines: mode = input_mode(line) if mode is None: pass elif mode[0] == "sections": section = mode[1] m = re.match(r'(\w+)\(?(\d*?)\)?\s*=\s*(.*)', line) if m: key = m.group(1).strip() key_ = m.group(2).strip() val = m.group(3).strip() if key_ != "": if sections[section].get(key, None) is None: val_ = [0.0] * 20 # MAX NTYP DEFINITION val_[int(key_) - 1] = PWInput.proc_val(key, val) sections[section][key] = val_ site_properties[key] = [] else: sections[section][key][int(key_) - 1] = PWInput.proc_val( key, val) else: sections[section][key] = PWInput.proc_val(key, val) elif mode[0] == "pseudo": m = re.match(r'(\w+)\s+(\d*.\d*)\s+(.*)', line) if m: pseudo[m.group(1).strip()] = {} pseudo[m.group(1).strip()]["index"] = pseudo_index pseudo[m.group(1).strip()]["pseudopot"] = m.group( 3).strip() pseudo_index += 1 elif mode[0] == "kpoints": m = re.match(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', line) if m: kpoints_grid = (int(m.group(1)), int(m.group(2)), int(m.group(3))) kpoints_shift = (int(m.group(4)), int(m.group(5)), int(m.group(6))) else: kpoints_mode = mode[1] elif mode[0] == "structure": m_l = re.match( r'(-?\d+\.?\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line) m_p = re.match( r'(\w+)\s+(-?\d+\.\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line) if m_l: lattice += [ float(m_l.group(1)), float(m_l.group(2)), float(m_l.group(3)) ] elif m_p: site_properties["pseudo"].append( pseudo[m_p.group(1)]["pseudopot"]) species += [ pseudo[m_p.group(1)]["pseudopot"].split(".")[0] ] coords += [[ float(m_p.group(2)), float(m_p.group(3)), float(m_p.group(4)) ]] for k, v in site_properties.items(): if k != "pseudo": site_properties[k].append(sections['system'][k][ pseudo[m_p.group(1)]["index"]]) if mode[1] == "angstrom": coords_are_cartesian = True elif mode[1] == "crystal": coords_are_cartesian = False structure = Structure(Lattice(lattice), species, coords, coords_are_cartesian=coords_are_cartesian, site_properties=site_properties) return PWInput(structure=structure, control=sections["control"], system=sections["system"], electrons=sections["electrons"], ions=sections["ions"], cell=sections["cell"], kpoints_mode=kpoints_mode, kpoints_grid=kpoints_grid, kpoints_shift=kpoints_shift)
def from_file(cls, filename, atom_style="full", sort_id=False): """ Constructor from parsing a file. Args: filename (str): Filename to read. atom_style (str): Associated atom_style. Default to "full". sort_id (bool): Whether sort each section by id. Default to True. """ with open(filename) as f: lines = f.readlines() clines = list(clean_lines(lines)) section_marks = [i for i, l in enumerate(clines) if l in itertools.chain(*SECTION_KEYWORDS.values())] parts = np.split(clines, section_marks) # First, parse header float_group = r'([0-9eE.+-]+)' header_pattern = {} header_pattern["counts"] = r'^\s*(\d+)\s+([a-zA-Z]+)$' header_pattern["types"] = r'^\s*(\d+)\s+([a-zA-Z]+)\s+types$' header_pattern["bounds"] = r'^\s*{}$'.format(r'\s+'.join( [float_group] * 2 + [r"([xyz])lo \3hi"])) header_pattern["tilt"] = r'^\s*{}$'.format(r'\s+'.join( [float_group] * 3 + ["xy xz yz"])) header = {"counts": {}, "types": {}} bounds = {} for l in parts[0]: match = None for k, v in header_pattern.items(): match = re.match(v, l) if match: break else: continue if match and k in ["counts", "types"]: header[k][match.group(2)] = int(match.group(1)) elif match and k == "bounds": g = match.groups() bounds[g[2]] = [float(i) for i in g[:2]] elif match and k == "tilt": header["tilt"] = [float(i) for i in match.groups()] header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"] # Then, parse each section topo_sections = SECTION_KEYWORDS["molecule"] def parse_section(single_section_lines): kw = single_section_lines[0] if kw in SECTION_KEYWORDS["ff"] and kw != "PairIJ Coeffs": parse_line = lambda l: {"coeffs": [literal_eval(x) for x in l[1:]]} elif kw == "PairIJ Coeffs": parse_line = lambda l: {"id1": int(l[0]), "id2": int(l[1]), "coeffs": [literal_eval(x) for x in l[2:]]} elif kw in topo_sections: n = {"Bonds": 2, "Angles": 3, "Dihedrals": 4, "Impropers": 4} parse_line = lambda l: {"type": int(l[1]), kw[:-1].lower(): [int(x) for x in l[2:n[kw] + 2]]} elif kw == "Atoms": keys = ATOMS_LINE_FORMAT[atom_style][:] sample_l = single_section_lines[1].split() if len(sample_l) == len(keys) + 1: pass elif len(sample_l) == len(keys) + 4: keys += ["nx", "ny", "nz"] else: warnings.warn("Atoms section format might be imcompatible" " with atom_style %s." % atom_style) float_keys = [k for k in keys if k in ATOMS_FLOATS] parse_line = lambda l: {k: float(v) if k in float_keys else int(v) for (k, v) in zip(keys, l[1:len(keys) + 1])} elif kw == "Velocities": parse_line = lambda l: {"velocity": [float(x) for x in l[1:4]]} elif kw == "Masses": parse_line = lambda l: {"mass": float(l[1])} else: warnings.warn("%s section parser has not been implemented. " "Skipping..." % kw) return kw, [] section = [] splitted_lines = [l.split() for l in single_section_lines[1:]] if sort_id and kw != "PairIJ Coeffs": splitted_lines = sorted(splitted_lines, key=lambda l: int(l[0])) for l in splitted_lines: line_data = parse_line(l) if kw != "PairIJ Coeffs": line_data["id"] = int(l[0]) section.append(line_data) return kw, section err_msg = "Bad LAMMPS data format where " body = {} seen_atoms = False for part in parts[1:]: name, section = parse_section(part) if name == "Atoms": seen_atoms = True if name in ["Velocities"] + topo_sections and not seen_atoms: raise RuntimeError(err_msg + "%s section appears before" " Atoms section" % name) body.update({name: section}) err_msg += "Nos. of {} do not match between header and {} section" assert len(body["Masses"]) == header["types"]["atom"], \ err_msg.format("atom types", "Masses") atom_sections = ["Atoms", "Velocities"] \ if body.get("Velocities") else ["Atoms"] for s in atom_sections: assert len(body[s]) == header["counts"]["atoms"], \ err_msg.format("atoms", s) for s in topo_sections: if header["counts"].get(s.lower(), 0) > 0: assert len(body[s]) == header["counts"][s.lower()], \ err_msg.format(s.lower(), s) items = {k.lower(): body[k] for k in ["Masses", "Atoms"]} items["box_bounds"] = header["bounds"] items["box_tilt"] = header.get("tilt") items["velocities"] = body.get("Velocities") ff_kws = [k for k in body.keys() if k in SECTION_KEYWORDS["ff"]] items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \ else None topo_kws = [k for k in body.keys() if k in SECTION_KEYWORDS["molecule"]] items["topology"] = {k: body[k] for k in topo_kws} \ if topo_kws else None items["atom_style"] = atom_style return cls(**items)
def from_string(data, default_names=None, read_velocities=True): """ Reads a Poscar from a string. The code will try its best to determine the elements in the POSCAR in the following order: 1. If default_names are supplied and valid, it will use those. Usually, default names comes from an external source, such as a POTCAR in the same directory. 2. If there are no valid default names but the input file is Vasp5-like and contains element symbols in the 6th line, the code will use that. 3. Failing (2), the code will check if a symbol is provided at the end of each coordinate. If all else fails, the code will just assign the first n elements in increasing atomic number, where n is the number of species, to the Poscar. For example, H, He, Li, .... This will ensure at least a unique element is assigned to each site and any analysis that does not require specific elemental properties should work fine. Args: data (str): String containing Poscar data. default_names ([str]): Default symbols for the POSCAR file, usually coming from a POTCAR in the same directory. read_velocities (bool): Whether to read or not velocities if they are present in the POSCAR. Default is True. Returns: Poscar object. """ # "^\s*$" doesn't match lines with no whitespace chunks = re.split(r"\n\s*\n", data.rstrip(), flags=re.MULTILINE) try: if chunks[0] == "": chunks.pop(0) chunks[0] = "\n" + chunks[0] except IndexError: raise ValueError("Empty POSCAR") # Parse positions lines = tuple(clean_lines(chunks[0].split("\n"), False)) comment = lines[0] scale = float(lines[1]) lattice = np.array([[float(i) for i in line.split()] for line in lines[2:5]]) if scale < 0: # In vasp, a negative scale factor is treated as a volume. We need # to translate this to a proper lattice vector scaling. vol = abs(det(lattice)) lattice *= (-scale / vol)**(1 / 3) else: lattice *= scale vasp5_symbols = False try: natoms = [int(i) for i in lines[5].split()] ipos = 6 except ValueError: vasp5_symbols = True symbols = lines[5].split() """ Atoms and number of atoms in POSCAR written with vasp appear on multiple lines when atoms of the same type are not grouped together and more than 20 groups are then defined ... Example : Cr16 Fe35 Ni2 1.00000000000000 8.5415010000000002 -0.0077670000000000 -0.0007960000000000 -0.0077730000000000 8.5224019999999996 0.0105580000000000 -0.0007970000000000 0.0105720000000000 8.5356889999999996 Fe Cr Fe Cr Fe Cr Fe Cr Fe Cr Fe Cr Fe Cr Fe Ni Fe Cr Fe Cr Fe Ni Fe Cr Fe 1 1 2 4 2 1 1 1 2 1 1 1 4 1 1 1 5 3 6 1 2 1 3 2 5 Direct ... """ nlines_symbols = 1 for nlines_symbols in range(1, 11): try: int(lines[5 + nlines_symbols].split()[0]) break except ValueError: pass for iline_symbols in range(6, 5 + nlines_symbols): symbols.extend(lines[iline_symbols].split()) natoms = [] iline_natoms_start = 5 + nlines_symbols for iline_natoms in range(iline_natoms_start, iline_natoms_start + nlines_symbols): natoms.extend([int(i) for i in lines[iline_natoms].split()]) atomic_symbols = list() for i in range(len(natoms)): atomic_symbols.extend([symbols[i]] * natoms[i]) ipos = 5 + 2 * nlines_symbols postype = lines[ipos].split()[0] sdynamics = False # Selective dynamics if postype[0] in "sS": sdynamics = True ipos += 1 postype = lines[ipos].split()[0] cart = postype[0] in "cCkK" nsites = sum(natoms) # If default_names is specified (usually coming from a POTCAR), use # them. This is in line with Vasp"s parsing order that the POTCAR # specified is the default used. if default_names: try: atomic_symbols = [] for i in range(len(natoms)): atomic_symbols.extend([default_names[i]] * natoms[i]) vasp5_symbols = True except IndexError: pass if not vasp5_symbols: ind = 3 if not sdynamics else 6 try: # Check if names are appended at the end of the coordinates. atomic_symbols = [ l.split()[ind] for l in lines[ipos + 1:ipos + 1 + nsites] ] # Ensure symbols are valid elements if not all( [Element.is_valid_symbol(sym) for sym in atomic_symbols]): raise ValueError("Non-valid symbols detected.") vasp5_symbols = True except (ValueError, IndexError): # Defaulting to false names. atomic_symbols = [] for i in range(len(natoms)): sym = Element.from_Z(i + 1).symbol atomic_symbols.extend([sym] * natoms[i]) warnings.warn("Elements in POSCAR cannot be determined. " "Defaulting to false names %s." % " ".join(atomic_symbols)) # read the atomic coordinates coords = [] selective_dynamics = list() if sdynamics else None for i in range(nsites): toks = lines[ipos + 1 + i].split() crd_scale = scale if cart else 1 coords.append([float(j) * crd_scale for j in toks[:3]]) if sdynamics: selective_dynamics.append( [tok.upper()[0] == "T" for tok in toks[3:6]]) if read_velocities: # Parse velocities if any velocities = [] if len(chunks) > 1: for line in chunks[1].strip().split("\n"): velocities.append([float(tok) for tok in line.split()]) # Parse the predictor-corrector data predictor_corrector = [] predictor_corrector_preamble = None if len(chunks) > 2: lines = chunks[2].strip().split("\n") # There are 3 sets of 3xN Predictor corrector parameters # So can't be stored as a single set of "site_property" # First line in chunk is a key in CONTCAR # Second line is POTIM # Third line is the thermostat parameters predictor_corrector_preamble = (lines[0] + "\n" + lines[1] + "\n" + lines[2]) # Rest is three sets of parameters, each set contains # x, y, z predictor-corrector parameters for every atom in orde lines = lines[3:] for st in range(nsites): d1 = [float(tok) for tok in lines[st].split()] d2 = [float(tok) for tok in lines[st + nsites].split()] d3 = [float(tok) for tok in lines[st + 2 * nsites].split()] predictor_corrector.append([d1, d2, d3]) else: velocities = None predictor_corrector = None predictor_corrector_preamble = None return Poscar_new( atomic_symbols, coords, lattice, comment, selective_dynamics, vasp5_symbols, velocities=velocities, predictor_corrector=predictor_corrector, predictor_corrector_preamble=predictor_corrector_preamble)
def from_file(cls, filename, atom_style="full", sort_id=False): """ Constructor that parses a file. Args: filename (str): Filename to read. atom_style (str): Associated atom_style. Default to "full". sort_id (bool): Whether sort each section by id. Default to True. """ with open(filename) as f: lines = f.readlines() kw_pattern = r"|".join(itertools.chain(*SECTION_KEYWORDS.values())) section_marks = [i for i, l in enumerate(lines) if re.search(kw_pattern, l)] parts = np.split(lines, section_marks) float_group = r"([0-9eE.+-]+)" header_pattern = dict() header_pattern["counts"] = r"^\s*(\d+)\s+([a-zA-Z]+)$" header_pattern["types"] = r"^\s*(\d+)\s+([a-zA-Z]+)\s+types$" header_pattern["bounds"] = r"^\s*{}$".format(r"\s+".join( [float_group] * 2 + [r"([xyz])lo \3hi"])) header_pattern["tilt"] = r"^\s*{}$".format(r"\s+".join( [float_group] * 3 + ["xy xz yz"])) header = {"counts": {}, "types": {}} bounds = {} for l in clean_lines(parts[0][1:]): # skip the 1st line match = None for k, v in header_pattern.items(): match = re.match(v, l) if match: break else: continue if match and k in ["counts", "types"]: header[k][match.group(2)] = int(match.group(1)) elif match and k == "bounds": g = match.groups() bounds[g[2]] = [float(i) for i in g[:2]] elif match and k == "tilt": header["tilt"] = [float(i) for i in match.groups()] header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"] def parse_section(sec_lines): title_info = sec_lines[0].split("#", 1) kw = title_info[0].strip() sio = StringIO("".join(sec_lines[2:])) # skip the 2nd line df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True) if kw.endswith("Coeffs") and not kw.startswith("PairIJ"): names = ["id"] + ["coeff%d" % i for i in range(1, df.shape[1])] elif kw == "PairIJ Coeffs": names = ["id1", "id2"] + ["coeff%d" % i for i in range(1, df.shape[1] - 1)] df.index.name = None elif kw in SECTION_HEADERS: names = ["id"] + SECTION_HEADERS[kw] elif kw == "Atoms": names = ["id"] + ATOMS_HEADERS[atom_style] if df.shape[1] == len(names): pass elif df.shape[1] == len(names) + 3: names += ["nx", "ny", "nz"] else: raise ValueError("Format in Atoms section inconsistent" " with atom_style %s" % atom_style) else: raise NotImplementedError("Parser for %s section" " not implemented" % kw) df.columns = names if sort_id: sort_by = "id" if kw != "PairIJ Coeffs" else ["id1", "id2"] df.sort_values(sort_by, inplace=True) if "id" in df.columns: df.set_index("id", drop=True, inplace=True) df.index.name = None return kw, df err_msg = "Bad LAMMPS data format where " body = {} seen_atoms = False for part in parts[1:]: name, section = parse_section(part) if name == "Atoms": seen_atoms = True if name in ["Velocities"] + SECTION_KEYWORDS["topology"] and \ not seen_atoms: # Atoms must appear earlier than these raise RuntimeError(err_msg + "%s section appears before" " Atoms section" % name) body.update({name: section}) err_msg += "Nos. of {} do not match between header and {} section" assert len(body["Masses"]) == header["types"]["atom"], \ err_msg.format("atom types", "Masses") atom_sections = ["Atoms", "Velocities"] \ if "Velocities" in body else ["Atoms"] for s in atom_sections: assert len(body[s]) == header["counts"]["atoms"], \ err_msg.format("atoms", s) for s in SECTION_KEYWORDS["topology"]: if header["counts"].get(s.lower(), 0) > 0: assert len(body[s]) == header["counts"][s.lower()], \ err_msg.format(s.lower(), s) items = {k.lower(): body[k] for k in ["Masses", "Atoms"]} items["box_bounds"] = header["bounds"] items["box_tilt"] = header.get("tilt") items["velocities"] = body.get("Velocities") ff_kws = [k for k in body if k in SECTION_KEYWORDS["ff"] + SECTION_KEYWORDS["class2"]] items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \ else None topo_kws = [k for k in body if k in SECTION_KEYWORDS["topology"]] items["topology"] = {k: body[k] for k in topo_kws} \ if topo_kws else None items["atom_style"] = atom_style return cls(**items)
def struct_import_pymatgen(filename,ubin=""): """ import structure from QE input/output into Pymatgen structure adapted from pymatgen native PWio method pymatgen.io.pwscf from_file and from_string methods DOES NOT WORK ATM filname: (string) input/output QE file containing structure ubin: (string) dummy variable; for consistency with remaining scripts """ import re from monty.io import zopen from monty.re import regrep from pymatgen.util.io_utils import clean_lines from pymatgen.io.pwscf import PWInput print("# Reading atomic coordinates from: ", filename) with zopen(filename, "rt") as f: string = f.read() lines = list(clean_lines(string.splitlines())) print(lines) def input_mode(line): if line[0] == "&": return ("sections", line[1:].lower()) elif "ATOMIC_SPECIES" in line: return ("pseudo", ) elif "K_POINTS" in line: return ("kpoints", line.split("{")[1][:-1]) elif "CELL_PARAMETERS" in line or "ATOMIC_POSITIONS" in line: return ("structure", line.split("{")[1][:-1]) elif line == "/": return None else: return mode sections = {"control": {}, "system": {}, "electrons": {}, "ions": {}, "cell":{}} pseudo = {} pseudo_index = 0 lattice = [] species = [] coords = [] structure = None site_properties = {"pseudo":[]} mode = None for line in lines: mode = input_mode(line) if mode == None: pass elif mode[0] == "sections": section = mode[1] m = re.match(r'(\w+)\(?(\d*?)\)?\s*=\s*(.*)', line) if m: key = m.group(1).strip() key_ = m.group(2).strip() val = m.group(3).strip() if key_ != "": if sections[section].get(key, None) == None: val_ = [0.0]*20 # MAX NTYP DEFINITION val_[int(key_)-1] = PWInput.proc_val(key, val) sections[section][key] = val_ site_properties[key] = [] else: sections[section][key][int(key_)-1] = PWInput.proc_val(key, val) else: sections[section][key] = PWInput.proc_val(key, val) # elif mode[0] == "pseudo": # m = re.match(r'(\w+)\s+(\d*.\d*)\s+(.*)', line) # if m: # pseudo[m.group(1).strip()] = {} # pseudo[m.group(1).strip()]["index"] = pseudo_index # pseudo[m.group(1).strip()]["pseudopot"] = m.group(3).strip() # pseudo_index += 1 # elif mode[0] == "kpoints": # m = re.match(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', line) # if m: # kpoints_grid = (int(m.group(1)), int(m.group(2)), int(m.group(3))) # kpoints_shift = (int(m.group(4)), int(m.group(5)), int(m.group(6))) # else: # kpoints_mode = mode[1] elif mode[0] == "structure": m_l = re.match(r'(-?\d+\.?\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line) m_p = re.match(r'(\w+)\s+(-?\d+\.\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line) if m_l: lattice += [ float(m_l.group(1)), float(m_l.group(2)), float(m_l.group(3)) ] elif m_p: site_properties["pseudo"].append(pseudo[m_p.group(1)]["pseudopot"]) species += [pseudo[m_p.group(1)]["pseudopot"].split(".")[0]] coords += [[float(m_p.group(2)), float(m_p.group(3)), float(m_p.group(4))]] for k, v in site_properties.items(): if k != "pseudo": site_properties[k].append(sections['system'][k][pseudo[m_p.group(1)]["index"]]) if mode[1] == "angstrom": coords_are_cartesian = True elif mode[1] == "crystal": coords_are_cartesian = False structure = Structure(Lattice(lattice), species, coords, coords_are_cartesian=coords_are_cartesian, site_properties=site_properties) return structure
def from_file(cls, filename, atom_style="full", sort_id=False): """ Constructor that parses a file. Args: filename (str): Filename to read. atom_style (str): Associated atom_style. Default to "full". sort_id (bool): Whether sort each section by id. Default to True. """ with open(filename) as f: lines = f.readlines() kw_pattern = r"|".join(itertools.chain(*SECTION_KEYWORDS.values())) section_marks = [i for i, l in enumerate(lines) if re.search(kw_pattern, l)] parts = np.split(lines, section_marks) float_group = r"([0-9eE.+-]+)" header_pattern = dict() header_pattern["counts"] = r"^\s*(\d+)\s+([a-zA-Z]+)$" header_pattern["types"] = r"^\s*(\d+)\s+([a-zA-Z]+)\s+types$" header_pattern["bounds"] = r"^\s*{}$".format(r"\s+".join( [float_group] * 2 + [r"([xyz])lo \3hi"])) header_pattern["tilt"] = r"^\s*{}$".format(r"\s+".join( [float_group] * 3 + ["xy xz yz"])) header = {"counts": {}, "types": {}} bounds = {} for l in clean_lines(parts[0][1:]): # skip the 1st line match = None for k, v in header_pattern.items(): match = re.match(v, l) if match: break else: continue if match and k in ["counts", "types"]: header[k][match.group(2)] = int(match.group(1)) elif match and k == "bounds": g = match.groups() bounds[g[2]] = [float(i) for i in g[:2]] elif match and k == "tilt": header["tilt"] = [float(i) for i in match.groups()] header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"] box = LammpsBox(header["bounds"], header.get("tilt")) def parse_section(sec_lines): title_info = sec_lines[0].split("#", 1) kw = title_info[0].strip() sio = StringIO("".join(sec_lines[2:])) # skip the 2nd line df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True) if kw.endswith("Coeffs") and not kw.startswith("PairIJ"): names = ["id"] + ["coeff%d" % i for i in range(1, df.shape[1])] elif kw == "PairIJ Coeffs": names = ["id1", "id2"] + ["coeff%d" % i for i in range(1, df.shape[1] - 1)] df.index.name = None elif kw in SECTION_HEADERS: names = ["id"] + SECTION_HEADERS[kw] elif kw == "Atoms": names = ["id"] + ATOMS_HEADERS[atom_style] if df.shape[1] == len(names): pass elif df.shape[1] == len(names) + 3: names += ["nx", "ny", "nz"] else: raise ValueError("Format in Atoms section inconsistent" " with atom_style %s" % atom_style) else: raise NotImplementedError("Parser for %s section" " not implemented" % kw) df.columns = names if sort_id: sort_by = "id" if kw != "PairIJ Coeffs" else ["id1", "id2"] df.sort_values(sort_by, inplace=True) if "id" in df.columns: df.set_index("id", drop=True, inplace=True) df.index.name = None return kw, df err_msg = "Bad LAMMPS data format where " body = {} seen_atoms = False for part in parts[1:]: name, section = parse_section(part) if name == "Atoms": seen_atoms = True if name in ["Velocities"] + SECTION_KEYWORDS["topology"] and \ not seen_atoms: # Atoms must appear earlier than these raise RuntimeError(err_msg + "%s section appears before" " Atoms section" % name) body.update({name: section}) err_msg += "Nos. of {} do not match between header and {} section" assert len(body["Masses"]) == header["types"]["atom"], \ err_msg.format("atom types", "Masses") atom_sections = ["Atoms", "Velocities"] \ if "Velocities" in body else ["Atoms"] for s in atom_sections: assert len(body[s]) == header["counts"]["atoms"], \ err_msg.format("atoms", s) for s in SECTION_KEYWORDS["topology"]: if header["counts"].get(s.lower(), 0) > 0: assert len(body[s]) == header["counts"][s.lower()], \ err_msg.format(s.lower(), s) items = {k.lower(): body[k] for k in ["Masses", "Atoms"]} items["velocities"] = body.get("Velocities") ff_kws = [k for k in body if k in SECTION_KEYWORDS["ff"] + SECTION_KEYWORDS["class2"]] items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \ else None topo_kws = [k for k in body if k in SECTION_KEYWORDS["topology"]] items["topology"] = {k: body[k] for k in topo_kws} \ if topo_kws else None items["atom_style"] = atom_style items["box"] = box return cls(**items)
def from_string(header_str): """ Reads Header string and returns Header object if header was generated by pymatgen. Args: header_str: pymatgen generated feff.inp header Returns: Structure object. """ # Checks to see if generated by pymatgen, if not it is impossible to # generate structure object so it is not possible to generate header # object and routine ends lines = tuple(clean_lines(header_str.split("\n"), False)) comment1 = lines[0] feffpmg = comment1.find("pymatgen") if feffpmg > 0: comment2 = ' '.join(lines[1].split()[2:]) #This sec section gets information to create structure object source = ' '.join(lines[2].split()[2:]) natoms = int(lines[8].split()[2]) basis_vec = lines[6].split() a = float(basis_vec[2]) b = float(basis_vec[3]) c = float(basis_vec[4]) lengths = [a, b, c] basis_ang = lines[7].split() alpha = float(basis_ang[2]) beta = float(basis_ang[3]) gamma = float(basis_ang[4]) angles = [alpha, beta, gamma] lattice = Lattice.from_lengths_and_angles(lengths, angles) atomic_symbols = [] for i in range(9, 9 + natoms): atomic_symbols.append(lines[i].split()[2]) # read the atomic coordinates coords = [] for i in range(natoms): toks = lines[i + 9].split() coords.append([float(s) for s in toks[3:]]) #Structure object is now generated and Header object returned struct_fromfile = Structure(lattice, atomic_symbols, coords, False, False, False) h = Header(struct_fromfile, source, comment2) return h else: return "Header not generated by pymatgen, " \ "cannot return header object"
def from_file(cls, filename, atom_style="full", sort_id=False): """ Constructor from parsing a file. Args: filename (str): Filename to read. atom_style (str): Associated atom_style. Default to "full". sort_id (bool): Whether sort each section by id. Default to True. """ with open(filename) as f: lines = f.readlines() clines = list(clean_lines(lines)) section_marks = [ i for i, l in enumerate(clines) if l in itertools.chain(*SECTION_KEYWORDS.values()) ] parts = np.split(clines, section_marks) # First, parse header float_group = r'([0-9eE.+-]+)' header_pattern = {} header_pattern["counts"] = r'^\s*(\d+)\s+([a-zA-Z]+)$' header_pattern["types"] = r'^\s*(\d+)\s+([a-zA-Z]+)\s+types$' header_pattern["bounds"] = r'^\s*{}$'.format( r'\s+'.join([float_group] * 2 + [r"([xyz])lo \3hi"])) header_pattern["tilt"] = r'^\s*{}$'.format( r'\s+'.join([float_group] * 3 + ["xy xz yz"])) header = {"counts": {}, "types": {}} bounds = {} for l in parts[0]: match = None for k, v in header_pattern.items(): match = re.match(v, l) if match: break else: continue if match and k in ["counts", "types"]: header[k][match.group(2)] = int(match.group(1)) elif match and k == "bounds": g = match.groups() bounds[g[2]] = [float(i) for i in g[:2]] elif match and k == "tilt": header["tilt"] = [float(i) for i in match.groups()] header["bounds"] = [bounds.get(i, [-0.5, 0.5]) for i in "xyz"] # Then, parse each section topo_sections = SECTION_KEYWORDS["molecule"] def parse_section(single_section_lines): kw = single_section_lines[0] if kw in SECTION_KEYWORDS["ff"] and kw != "PairIJ Coeffs": parse_line = lambda l: { "coeffs": [literal_eval(x) for x in l[1:]] } elif kw == "PairIJ Coeffs": parse_line = lambda l: { "id1": int(l[0]), "id2": int(l[1]), "coeffs": [literal_eval(x) for x in l[2:]] } elif kw in topo_sections: n = {"Bonds": 2, "Angles": 3, "Dihedrals": 4, "Impropers": 4} parse_line = lambda l: { "type": int(l[1]), kw[:-1].lower(): [int(x) for x in l[2:n[kw] + 2]] } elif kw == "Atoms": keys = ATOMS_LINE_FORMAT[atom_style][:] sample_l = single_section_lines[1].split() if len(sample_l) == len(keys) + 1: pass elif len(sample_l) == len(keys) + 4: keys += ["nx", "ny", "nz"] else: warnings.warn("Atoms section format might be imcompatible" " with atom_style %s." % atom_style) float_keys = [k for k in keys if k in ATOMS_FLOATS] parse_line = lambda l: { k: float(v) if k in float_keys else int(v) for (k, v) in zip(keys, l[1:len(keys) + 1]) } elif kw == "Velocities": parse_line = lambda l: {"velocity": [float(x) for x in l[1:4]]} elif kw == "Masses": parse_line = lambda l: {"mass": float(l[1])} else: warnings.warn("%s section parser has not been implemented. " "Skipping..." % kw) return kw, [] section = [] splitted_lines = [l.split() for l in single_section_lines[1:]] if sort_id and kw != "PairIJ Coeffs": splitted_lines = sorted(splitted_lines, key=lambda l: int(l[0])) for l in splitted_lines: line_data = parse_line(l) if kw != "PairIJ Coeffs": line_data["id"] = int(l[0]) section.append(line_data) return kw, section err_msg = "Bad LAMMPS data format where " body = {} seen_atoms = False for part in parts[1:]: name, section = parse_section(part) if name == "Atoms": seen_atoms = True if name in ["Velocities"] + topo_sections and not seen_atoms: raise RuntimeError(err_msg + "%s section appears before" " Atoms section" % name) body.update({name: section}) err_msg += "Nos. of {} do not match between header and {} section" assert len(body["Masses"]) == header["types"]["atom"], \ err_msg.format("atom types", "Masses") atom_sections = ["Atoms", "Velocities"] \ if body.get("Velocities") else ["Atoms"] for s in atom_sections: assert len(body[s]) == header["counts"]["atoms"], \ err_msg.format("atoms", s) for s in topo_sections: if header["counts"].get(s.lower(), 0) > 0: assert len(body[s]) == header["counts"][s.lower()], \ err_msg.format(s.lower(), s) items = {k.lower(): body[k] for k in ["Masses", "Atoms"]} items["box_bounds"] = header["bounds"] items["box_tilt"] = header.get("tilt") items["velocities"] = body.get("Velocities") ff_kws = [k for k in body.keys() if k in SECTION_KEYWORDS["ff"]] items["force_field"] = {k: body[k] for k in ff_kws} if ff_kws \ else None topo_kws = [ k for k in body.keys() if k in SECTION_KEYWORDS["molecule"] ] items["topology"] = {k: body[k] for k in topo_kws} \ if topo_kws else None items["atom_style"] = atom_style return cls(**items)
def from_string(header_str): """ Reads Header string and returns Header object if header was generated by pymatgen. Args: header_str: pymatgen generated feff.inp header Returns: Structure object. """ # Checks to see if generated by pymatgen, if not it is impossible to # generate structure object so it is not possible to generate header # object and routine ends lines = tuple(clean_lines(header_str.split("\n"), False)) comment1 = lines[0] feffpmg = comment1.find("pymatgen") if feffpmg > 0: comment2 = ' '.join(lines[1].split()[2:]) #This sec section gets information to create structure object source = ' '.join(lines[2].split()[2:]) natoms = int(lines[8].split()[2]) basis_vec = lines[6].split() a = float(basis_vec[2]) b = float(basis_vec[3]) c = float(basis_vec[4]) lengths = [a, b, c] basis_ang = lines[7].split() alpha = float(basis_ang[2]) beta = float(basis_ang[3]) gamma = float(basis_ang[4]) angles = [alpha, beta, gamma] lattice = Lattice.from_lengths_and_angles(lengths, angles) atomic_symbols = [] for i in xrange(9, 9 + natoms): atomic_symbols.append(lines[i].split()[2]) # read the atomic coordinates coords = [] for i in xrange(natoms): toks = lines[i + 9].split() coords.append([float(s) for s in toks[3:]]) #Structure object is now generated and Header object returned struct_fromfile = Structure(lattice, atomic_symbols, coords, False, False, False) h = Header(struct_fromfile, source, comment2) return h else: return "Header not generated by pymatgen, " \ "cannot return header object"
def from_string(string): """ Reads an PWInput object from a string. Args: string (str): PWInput string Returns: PWInput object """ lines = list(clean_lines(string.splitlines())) def input_mode(line): if line[0] == "&": return ("sections", line[1:].lower()) elif "ATOMIC_SPECIES" in line: return ("pseudo", ) elif "K_POINTS" in line: return ("kpoints", line.split("{")[1][:-1]) elif "CELL_PARAMETERS" in line or "ATOMIC_POSITIONS" in line: return ("structure", line.split("{")[1][:-1]) elif line == "/": return None else: return mode sections = {"control": {}, "system": {}, "electrons": {}, "ions": {}, "cell":{}} pseudo = {} pseudo_index = 0 lattice = [] species = [] coords = [] structure = None site_properties = {"pseudo":[]} mode = None for line in lines: mode = input_mode(line) if mode == None: pass elif mode[0] == "sections": section = mode[1] m = re.match(r'(\w+)\(?(\d*?)\)?\s*=\s*(.*)', line) if m: key = m.group(1).strip() key_ = m.group(2).strip() val = m.group(3).strip() if key_ != "": if sections[section].get(key, None) == None: val_ = [0.0]*20 # MAX NTYP DEFINITION val_[int(key_)-1] = PWInput.proc_val(key, val) sections[section][key] = val_ site_properties[key] = [] else: sections[section][key][int(key_)-1] = PWInput.proc_val(key, val) else: sections[section][key] = PWInput.proc_val(key, val) elif mode[0] == "pseudo": m = re.match(r'(\w+)\s+(\d*.\d*)\s+(.*)', line) if m: pseudo[m.group(1).strip()] = {} pseudo[m.group(1).strip()]["index"] = pseudo_index pseudo[m.group(1).strip()]["pseudopot"] = m.group(3).strip() pseudo_index += 1 elif mode[0] == "kpoints": m = re.match(r'(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)', line) if m: kpoints_grid = (int(m.group(1)), int(m.group(2)), int(m.group(3))) kpoints_shift = (int(m.group(4)), int(m.group(5)), int(m.group(6))) else: kpoints_mode = mode[1] elif mode[0] == "structure": m_l = re.match(r'(-?\d+\.?\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line) m_p = re.match(r'(\w+)\s+(-?\d+\.\d*)\s+(-?\d+\.?\d*)\s+(-?\d+\.?\d*)', line) if m_l: lattice += [ float(m_l.group(1)), float(m_l.group(2)), float(m_l.group(3)) ] elif m_p: site_properties["pseudo"].append(pseudo[m_p.group(1)]["pseudopot"]) species += [pseudo[m_p.group(1)]["pseudopot"].split(".")[0]] coords += [[float(m_p.group(2)), float(m_p.group(3)), float(m_p.group(4))]] for k, v in site_properties.items(): if k != "pseudo": site_properties[k].append(sections['system'][k][pseudo[m_p.group(1)]["index"]]) if mode[1] == "angstrom": coords_are_cartesian = True elif mode[1] == "crystal": coords_are_cartesian = False structure = Structure(Lattice(lattice), species, coords, coords_are_cartesian=coords_are_cartesian, site_properties=site_properties) return PWInput(structure=structure, control=sections["control"], system=sections["system"], electrons=sections["electrons"], ions=sections["ions"], cell=sections["cell"], kpoints_mode=kpoints_mode, kpoints_grid=kpoints_grid, kpoints_shift=kpoints_shift)
def from_string(data, default_names=None): """ Reads a Poscar from a string. The code will try its best to determine the elements in the POSCAR in the following order: 1. If default_names are supplied and valid, it will use those. Usually, default names comes from an external source, such as a POTCAR in the same directory. 2. If there are no valid default names but the input file is Vasp5-like and contains element symbols in the 6th line, the code will use that. 3. Failing (2), the code will check if a symbol is provided at the end of each coordinate. If all else fails, the code will just assign the first n elements in increasing atomic number, where n is the number of species, to the Poscar. For example, H, He, Li, .... This will ensure at least a unique element is assigned to each site and any analysis that does not require specific elemental properties should work fine. Args: data: string containing Poscar data. default_names: default symbols for the POSCAR file, usually coming from a POTCAR in the same directory. Returns: Poscar object. """ chunks = re.split("^\s*$", data.strip(), flags=re.MULTILINE) #Parse positions lines = tuple(clean_lines(chunks[0].split("\n"), False)) comment = lines[0] scale = float(lines[1]) lattice = np.array([map(float, line.split()) for line in lines[2:5]]) if scale < 0: # In vasp, a negative scale factor is treated as a volume. We need # to translate this to a proper lattice vector scaling. vol = abs(det(lattice)) lattice *= (-scale / vol) ** (1 / 3) else: lattice *= scale vasp5_symbols = False try: natoms = map(int, lines[5].split()) ipos = 6 except ValueError: vasp5_symbols = True symbols = lines[5].split() natoms = map(int, lines[6].split()) atomic_symbols = list() for i in xrange(len(natoms)): atomic_symbols.extend([symbols[i]] * natoms[i]) ipos = 7 postype = lines[ipos].split()[0] sdynamics = False # Selective dynamics if postype[0] in "sS": sdynamics = True ipos += 1 postype = lines[ipos].split()[0] cart = postype[0] in "cCkK" nsites = sum(natoms) # If default_names is specified (usually coming from a POTCAR), use # them. This is in line with Vasp"s parsing order that the POTCAR # specified is the default used. if default_names: try: atomic_symbols = [] for i in xrange(len(natoms)): atomic_symbols.extend([default_names[i]] * natoms[i]) vasp5_symbols = True except IndexError: pass if not vasp5_symbols: ind = 3 if not sdynamics else 6 try: #check if names are appended at the end of the coordinates. atomic_symbols = [l.split()[ind] for l in lines[ipos + 1:ipos + 1 + nsites]] #Ensure symbols are valid elements if not all([Element.is_valid_symbol(sym) for sym in atomic_symbols]): raise ValueError("Non-valid symbols detected.") vasp5_symbols = True except (ValueError, IndexError): #Defaulting to false names. atomic_symbols = [] for i in xrange(len(natoms)): sym = Element.from_Z(i + 1).symbol atomic_symbols.extend([sym] * natoms[i]) warnings.warn("Elements in POSCAR cannot be determined. " "Defaulting to false names {}." .format(" ".join(atomic_symbols))) # read the atomic coordinates coords = [] selective_dynamics = list() if sdynamics else None for i in xrange(nsites): toks = lines[ipos + 1 + i].split() coords.append(map(float, toks[:3])) if sdynamics: selective_dynamics.append([tok.upper()[0] == "T" for tok in toks[3:6]]) struct = Structure(lattice, atomic_symbols, coords, False, False, cart) #parse velocities if any velocities = [] if len(chunks) > 1: for line in chunks[1].strip().split("\n"): velocities.append([float(tok) for tok in line.split()]) predictor_corrector = [] if len(chunks) > 2: lines = chunks[2].strip().split("\n") predictor_corrector.append([int(lines[0])]) for line in lines[1:]: predictor_corrector.append([float(tok) for tok in line.split()]) return Poscar(struct, comment, selective_dynamics, vasp5_symbols, velocities=velocities, predictor_corrector=predictor_corrector)