def parse_band_gaps(lines, initial_lineno): """read band gap information Note: this is new for CRYSTAL17 Parameters ---------- lines: list[str] initial_lineno: int Returns ------- ParsedSection """ band_gaps = {} for k, line in enumerate(lines[initial_lineno:]): curr_lineno = initial_lineno + k line = line.strip() # TODO breaking line? # TODO use regex: # re.compile(r"(DIRECT|INDIRECT) ENERGY BAND GAP:\s*([.\d]*)", # re.DOTALL), if "BAND GAP" in line: if fnmatch(line.strip(), "ALPHA BAND GAP:*eV"): bgvalue = split_numbers(line)[0] bgtype = "alpha" elif fnmatch(line.strip(), "BETA BAND GAP:*eV"): bgvalue = split_numbers(line)[0] bgtype = "beta" elif fnmatch(line.strip(), "BAND GAP:*eV"): bgvalue = split_numbers(line)[0] bgtype = "all" else: return ParsedSection( initial_lineno, band_gaps, "found a band gap of unknown format at line {0}: {1}". format(curr_lineno, line), ) if bgtype in band_gaps: return ParsedSection( initial_lineno, band_gaps, "band gap data already contains {0} value before line {1}: {2}" .format(bgtype, curr_lineno, line), ) band_gaps[bgtype] = bgvalue return ParsedSection(initial_lineno, band_gaps)
def parse_symmetry_section(data, initial_lineno, line, lines): """update dict with symmetry related variables Parameters ---------- data: dict existing data to add the geometry data to initial_lineno: int line: str the current line lines: list[str] """ if fnmatch(line, "*SYMMOPS - TRANSLATORS IN FRACTIONAL UNITS*"): nums = split_numbers(line) if not len(nums) == 1: raise IOError( "was expecting a single number, representing the number of symmops, on this line:" " {0}, got: {1}".format(initial_lineno, line)) nsymmops = int(nums[0]) if not fnmatch( lines[initial_lineno + 1], "*MATRICES AND TRANSLATORS IN THE CRYSTALLOGRAPHIC REFERENCE FRAME*", ): raise IOError( "was expecting CRYSTALLOGRAPHIC REFERENCE FRAME on this line" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1].strip())) if not fnmatch(lines[initial_lineno + 2], "*V*INV*ROTATION MATRICES*TRANSLATORS*"): raise IOError("was expecting symmetry headers on this line" " {0}, got: {1}".format( initial_lineno + 2, lines[initial_lineno + 2].strip())) symmops = [] for j in range(nsymmops): values = split_numbers(lines[initial_lineno + 3 + j]) if not len(values) == 14: raise IOError( "was expecting 14 values for symmetry data on this line" " {0}, got: {1}".format( initial_lineno + 3 + j, lines[initial_lineno + 3 + j].strip())) symmops.append(values[2:14]) data["primitive_symmops"] = symmops
def parse_crystal_ppan(content): """Parse CRYSTAL Mulliken Population outputs (PPAN.DAT) Parameters ---------- content: str Notes ----- Format: :: NSPIN,NATOM IAT,NSHELL Xiat,Yiat,Ziat (AU) QTOT shell charges NORB orbital charges """ spin_names = ["alpha+beta_electrons", "alpha-beta_electrons"] data = {} lines = content.splitlines() line = _new_line(lines) nspin, natoms = split_numbers(line) for spin_num in range(int(nspin)): spin_name = spin_names[spin_num] spin_data = data.setdefault(spin_name, {"atoms": []}) for atom_num in range(int(natoms)): line = _new_line(lines) atomic_number, nshell = split_numbers(line) line = _new_line(lines) coordinate = split_numbers(line) line = _new_line(lines) values = split_numbers(line) total_charge = values[0] shell_charges = values[1:] while len(shell_charges) < nshell: line = _new_line(lines) shell_charges.extend(split_numbers(line)) line = _new_line(lines) (norbitals, ) = split_numbers(line) orbital_charges = [] while len(orbital_charges) < norbitals: line = _new_line(lines) orbital_charges.extend(split_numbers(line)) spin_data["atoms"].append({ "atomic_number": atomic_number, "coordinate": coordinate, "total_charge": total_charge, "shell_charges": shell_charges, "orbital_charges": orbital_charges, }) spin_data["summed_charge"] = sum(a["total_charge"] for a in spin_data["atoms"]) return data
def initial_parse(lines): """Scan the file for errors, and find the final elapsed time value.""" errors = [] warnings = [] parser_errors = [] mpi_abort = False telapse_line = None start_lines = {} found_endprop = False for lineno, line in enumerate(lines): if "WARNING" in line.upper(): warnings.append(line.strip()) elif "ERROR" in line: # TODO ignore errors before program execution (e.g. in mpiexec setup)? if "open_hca: getaddr_netdev ERROR" not in line: errors.append(line.strip()) elif "MPI_Abort" in line: # only record one mpi_abort event (to not clutter output) if not mpi_abort: errors.append(line.strip()) mpi_abort = True elif "CONVERGENCE TESTS UNSATISFIED" in line.upper(): errors.append(line.strip()) elif "TELAPSE" in line: telapse_line = lineno elif line.strip().startswith("ENDPROP"): found_endprop = True total_seconds = None if telapse_line: total_seconds = int(split_numbers(lines[telapse_line].split("TELAPSE")[1])[0]) # m, s = divmod(total_seconds, 60) # h, m = divmod(m, 60) # elapsed_time = "%d:%02d:%02d" % (h, m, s) if not found_endprop: # TODO separate exit code? parser_errors.append("No ENDPROP found in stdout") return errors, warnings, parser_errors, total_seconds, start_lines
def parse_scf_final_energy(lines, initial_lineno, final_lineno=None): """read post initial scf data Parameters ---------- lines: list[str] initial_lineno: int Returns ------- """ scf_energy = {} for i, line in enumerate(lines[initial_lineno:]): if final_lineno is not None and i + initial_lineno == final_lineno: return ParsedSection(final_lineno, scf_energy) if line.strip().startswith("TTTTTTT") or line.strip().startswith( "******"): return ParsedSection(final_lineno, scf_energy) if fnmatch(line.strip(), "TOTAL ENERGY*DE*"): if not fnmatch(line.strip(), "TOTAL ENERGY*AU*DE*"): raise IOError("was expecting units in a.u. on line:" " {0}, got: {1}".format(initial_lineno + i, line)) if "total_corrected" in scf_energy: raise IOError("total corrected energy found twice, on line:" " {0}, got: {1}".format(initial_lineno + i, line)) scf_energy["total_corrected"] = convert_units( split_numbers(line)[1], "hartree", "eV") return ParsedSection( final_lineno, scf_energy, "Did not find end of Post SCF section (starting on line {})".format( initial_lineno), )
def read_gaussian_cube(handle, return_density=False, dist_units="angstrom"): """Parse gaussian cube files to a data structure. The specification can be found at: http://h5cube-spec.readthedocs.io/en/latest/cubeformat.html CRYSTAL outputs include DENSCUBE.DAT, SPINCUBE.DAT, POTCUBE.DAT. Parameters ---------- handle : file-like an open file handle return_density : bool whether to read and return the density values dist_units : str the distance units to return Returns ------- aiida_crystal17.parsers.raw.gaussian_cube.GcubeResult """ in_dunits = "bohr" header = [handle.readline().strip(), handle.readline().strip()] settings = split_numbers(handle.readline().strip()) if len(settings) > 4 and settings[4] != 1: # TODO implement NVAL != 1 raise NotImplementedError("not yet implemented NVAL != 1") natoms = settings[0] origin = convert_units(np.array(settings[1:4]), in_dunits, dist_units) if natoms < 0: # TODO implement DSET_IDS raise NotImplementedError("not yet implemented DSET_IDS") an, ax, ay, az = split_numbers(handle.readline().strip()) bn, bx, by, bz = split_numbers(handle.readline().strip()) cn, cx, cy, cz = split_numbers(handle.readline().strip()) voxel_cell = convert_units( np.array([[ax, ay, az], [bx, by, bz], [cx, cy, cz]]), in_dunits, dist_units) avec = convert_units(np.array([ax, ay, az]) * an, in_dunits, dist_units) bvec = convert_units(np.array([bx, by, bz]) * bn, in_dunits, dist_units) cvec = convert_units(np.array([cx, cy, cz]) * cn, in_dunits, dist_units) atomic_numbers = [] nuclear_charges = [] ccoords = [] for _ in range(int(natoms)): anum, ncharge, x, y, z = split_numbers(handle.readline().strip()) atomic_numbers.append(int(anum)) nuclear_charges.append(ncharge) ccoord = convert_units(np.asarray([x, y, z]), in_dunits, dist_units) - origin ccoords.append(ccoord.tolist()) density = None if return_density: values = [] for line in handle: values += line.split() density = np.array(values, dtype=float).reshape( (int(an), int(bn), int(cn))) return GcubeResult( header, [avec.tolist(), bvec.tolist(), cvec.tolist()], voxel_cell.tolist(), [int(an), int(bn), int(cn)], origin.tolist(), ccoords, nuclear_charges, atomic_numbers, { "conversion": "CODATA2014", "length": dist_units }, density, )
def parse_crystal_fort25(content): """Parse the fort.25 output from CRYSTAL. Notes ----- File Format: :: 1ST RECORD : -%-,IHFERM,TYPE,NROW,NCOL,DX,DY,COSXY (format : A3,I1,A4,2I5,1P,(3E12.5)) 2ND RECORD : X0,Y0 (format : 1P,6E12.5) 3RD RECORD : I1,I2,I3,I4,I5,I6 (format : 6I3) 4TH RECORD AND FOLLOWING : ((RDAT(I,J),I=1,NROW),J=1,NCOL) (format : 1P,6E12.5) Meaning of the variables: 1 NROW 1 (DOSS are written one projection at a time) NCOL number of energy points in which the DOS is calculated DX energy increment (hartree) DY not used COSXY Fermi energy (hartree) 2 X0 energy corresponding to the first point Y0 not used 3 I1 number of the projection; I2 number of atomic orbitals of the projection; I3,I4,I5,I6 not used 4 RO(J),J=1,NCOL DOS: density of states ro(eps(j)) (atomic units). """ system_type = None fermi_energy = None energy_delta = None initial_energy = None len_dos = None alpha_projections = {} beta_projections = {} proj_number = 0 lines = content.splitlines() lineno = 0 while lineno < len(lines): line = lines[lineno].strip() if line.startswith("-%-"): proj_number += 1 if system_type is None: system_type = line[3] elif not system_type == line[3]: raise IOError( "projection {0} has different system type ({1}) to previous ({2})".format( proj_number, line[3], system_type ) ) if not line[4:8] == "DOSS": raise IOError("projection {0} is not of type DOSS".format(proj_number)) nrows, ncols, _, denergy, fermi = split_numbers(line[8:]) # nrows, ncols = (int(nrows), int(ncols)) if energy_delta is None: energy_delta = denergy elif not energy_delta == denergy: raise IOError( "projection {0} has different delta energy ({1}) to previous ({2})".format( proj_number, denergy, energy_delta ) ) if fermi_energy is None: fermi_energy = fermi elif not fermi_energy == fermi: raise IOError( "projection {0} has different fermi energy ({1}) to previous ({2})".format( proj_number, fermi, fermi_energy ) ) lineno += 1 line = lines[lineno].strip() ienergy = split_numbers(line)[1] if initial_energy is None: initial_energy = ienergy elif not initial_energy == ienergy: raise IOError( "projection {0} has different initial energy ({1}) to previous ({2})".format( proj_number, ienergy, initial_energy ) ) lineno += 1 line = lines[lineno].strip() projid, norbitals, _, _, _, _ = [int(i) for i in line.split()] lineno += 1 line = lines[lineno].strip() dos = [] while not line.startswith("-%-"): dos += split_numbers(line) if lineno + 1 >= len(lines): break lineno += 1 line = lines[lineno].strip() if len_dos is None: len_dos = len(dos) elif not len_dos == len(dos): raise IOError( "projection {0} has different dos value lengths ({1}) to previous ({2})".format( proj_number, len(dos), len_dos ) ) if projid not in alpha_projections: alpha_projections[projid] = { "id": projid, "norbitals": norbitals, "dos": dos, } elif projid in beta_projections: raise IOError( "three data sets with same projid ({0}) were found".format(projid) ) else: beta_projections[projid] = { "id": projid, "norbitals": norbitals, "dos": dos, } else: lineno += 1 system_type = IHFERM_MAP[int(system_type)] fermi_energy = convert_units(float(fermi_energy), "hartree", "eV") energy_delta = convert_units(float(energy_delta), "hartree", "eV") initial_energy = convert_units(float(initial_energy), "hartree", "eV") len_dos = int(len_dos) energies = np.linspace( initial_energy, initial_energy + len_dos * energy_delta, len_dos ).tolist() total_alpha = None total_beta = None if alpha_projections: total_alpha = alpha_projections.pop(max(alpha_projections.keys())) if beta_projections: total_beta = beta_projections.pop(max(beta_projections.keys())) return { "units": {"conversion": "CODATA2014", "energy": "eV"}, "energy": energies, "system_type": system_type, "fermi_energy": fermi_energy, "total_alpha": total_alpha, "total_beta": total_beta, "projections_alpha": list(alpha_projections.values()) if alpha_projections else None, "projections_beta": list(beta_projections.values()) if beta_projections else None, }
def parse_scf_section(lines, initial_lineno, final_lineno=None): """read scf data Parameters ---------- lines: list[str] initial_lineno: int final_lineno: int or None Returns ------- ParsedSection """ scf = [] scf_cyc = None last_cyc_num = None for k, line in enumerate(lines[initial_lineno:]): curr_lineno = k + initial_lineno if "SCF ENDED" in line or (final_lineno is not None and curr_lineno == final_lineno): # add last scf cycle if scf_cyc: scf.append(scf_cyc) if "CONVERGE" not in line: return ParsedSection(curr_lineno, scf, None, line.strip()) else: return ParsedSection(curr_lineno, scf, None) line = line.strip() if fnmatch(line, "CYC*"): # start new cycle if scf_cyc is not None: scf.append(scf_cyc) scf_cyc = {} # check we are adding them in sequential order cur_cyc_num = split_numbers(line)[0] if last_cyc_num is not None: if cur_cyc_num != last_cyc_num + 1: return ParsedSection( curr_lineno, scf, "was expecting the SCF cyle number to be {0} in line {1}: {2}" .format(int(last_cyc_num + 1), curr_lineno, line), ) last_cyc_num = cur_cyc_num if fnmatch(line, "*ETOT*"): if not fnmatch(line, "*ETOT(AU)*"): raise IOError("was expecting units in a.u. on line {0}, " "got: {1}".format(curr_lineno, line)) # this is the initial energy of the configuration and so actually the energy of the previous run if scf: scf[-1]["energy"] = scf[-1].get("energy", {}) scf[-1]["energy"]["total"] = convert_units( split_numbers(line)[1], "hartree", "eV") elif scf_cyc is None: continue # The total magnetization is the integral of the magnetization in the cell: # MT=∫ (nup-ndown) d3 r # # The absolute magnetization is the integral of the absolute value of the magnetization in the cell: # MA=∫ |nup-ndown| d3 r # # In a simple ferromagnetic material they should be equal (except possibly for an overall sign). # In simple antiferromagnets (like FeO) MT is zero and MA is twice the magnetization of each of the two atoms. if line.startswith("CHARGE NORMALIZATION FACTOR"): scf_cyc["CHARGE NORMALIZATION FACTOR".lower().replace( " ", "_")] = split_numbers(line)[0] if line.startswith("SUMMED SPIN DENSITY"): scf_cyc["spin_density_total"] = split_numbers(line)[0] if line.startswith("TOTAL ATOMIC CHARGES"): scf_cyc["atomic_charges_peratom"] = [] j = curr_lineno + 1 while len(lines[j].strip().split()) == len(split_numbers( lines[j])): scf_cyc["atomic_charges_peratom"] += split_numbers(lines[j]) j += 1 if line.startswith("TOTAL ATOMIC SPINS"): scf_cyc["spin_density_peratom"] = [] j = curr_lineno + 1 while len(lines[j].strip().split()) == len(split_numbers( lines[j])): scf_cyc["spin_density_peratom"] += split_numbers(lines[j]) j += 1 scf_cyc["spin_density_absolute"] = sum( [abs(s) for s in split_numbers(lines[curr_lineno + 1])]) # add last scf cycle if scf_cyc: scf.append(scf_cyc) return ParsedSection( curr_lineno, scf, "Did not find end of SCF section (starting on line {})".format( initial_lineno), )
def parse_geometry_section(data, initial_lineno, line, lines): """Parse a section of geometry related variables. Parameters ---------- data: dict existing data to add the geometry data to initial_lineno: int line: str the current line lines: list[str] Notes ----- For initial and 'FINAL OPTIMIZED GEOMETRY' only:: DIRECT LATTICE VECTORS CARTESIAN COMPONENTS (ANGSTROM) X Y Z 0.355114561000E+01 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 0.355114561000E+01 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 0.535521437000E+01 CARTESIAN COORDINATES - PRIMITIVE CELL ******************************************************************************* * ATOM X(ANGSTROM) Y(ANGSTROM) Z(ANGSTROM) ******************************************************************************* 1 26 FE 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 2 26 FE 1.775572805000E+00 1.775572805000E+00 0.000000000000E+00 3 16 S -1.110223024625E-16 1.775572805000E+00 1.393426779074E+00 4 16 S 1.775572805000E+00 7.885127240037E-16 -1.393426779074E+00 For initial, final and optimisation steps: Primitive cell:: PRIMITIVE CELL - CENTRING CODE 1/0 VOLUME= 36.099581 - DENSITY 6.801 g/cm^3 A B C ALPHA BETA GAMMA 2.94439264 2.94439264 4.16400000 90.000000 90.000000 90.000000 ******************************************************************************* ATOMS IN THE ASYMMETRIC UNIT 4 - ATOMS IN THE UNIT CELL: 4 ATOM X/A Y/B Z/C ******************************************************************************* 1 T 28 NI 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 Crystallographic cell (only if the geometry is not originally primitive):: CRYSTALLOGRAPHIC CELL (VOLUME= 74.61846100) A B C ALPHA BETA GAMMA 4.21000000 4.21000000 4.21000000 90.000000 90.000000 90.000000 COORDINATES IN THE CRYSTALLOGRAPHIC CELL ATOM X/A Y/B Z/C ******************************************************************************* 1 T 12 MG 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 """ # check that units are correct (probably not needed) if fnmatch(line, "LATTICE PARAMETERS*(*)"): if not ("ANGSTROM" in line and "DEGREES" in line): raise IOError( "was expecting lattice parameters in angstroms and degrees on line:" " {0}, got: {1}".format(initial_lineno, line)) return for pattern, field, pattern2 in [ ("PRIMITIVE*CELL*", "primitive_cell", "ATOMS IN THE ASYMMETRIC UNIT*"), ( "CRYSTALLOGRAPHIC*CELL*", "crystallographic_cell", "COORDINATES IN THE CRYSTALLOGRAPHIC CELL", ), ]: if fnmatch(line, pattern): if not fnmatch(lines[initial_lineno + 1].strip(), "A*B*C*ALPHA*BETA*GAMMA"): raise IOError("was expecting A B C ALPHA BETA GAMMA on line:" " {0}, got: {1}".format( initial_lineno + 1, lines[initial_lineno + 1])) data[field] = edict.merge([ data.get(field, {}), { "cell_parameters": dict( zip( ["a", "b", "c", "alpha", "beta", "gamma"], split_numbers(lines[initial_lineno + 2]), )) }, ]) elif fnmatch(line, pattern2): periodic = [True, True, True] if not fnmatch(lines[initial_lineno + 1].strip(), "ATOM*X/A*Y/B*Z/C"): # for 2d (slab) can get z in angstrom (and similar for 1d) if fnmatch(lines[initial_lineno + 1].strip(), "ATOM*X/A*Y/B*Z(ANGSTROM)*"): periodic = [True, True, False] elif fnmatch( lines[initial_lineno + 1].strip(), "ATOM*X/A*Y(ANGSTROM)*Z(ANGSTROM)*", ): periodic = [True, False, False] elif fnmatch( lines[initial_lineno + 1].strip(), "ATOM*X(ANGSTROM)*Y(ANGSTROM)*Z(ANGSTROM)*", ): periodic = [False, False, False] cell_params = dict( zip( ["a", "b", "c", "alpha", "beta", "gamma"], [500.0, 500.0, 500.0, 90.0, 90.0, 90.0], )) data[field] = edict.merge([ data.get(field, {}), { "cell_parameters": cell_params } ]) else: raise IOError( "was expecting ATOM X Y Z (in units of ANGSTROM or fractional) on line:" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) if not all(periodic) and "cell_parameters" not in data.get( field, {}): raise IOError( "require cell parameters to have been set for non-periodic directions in line" " #{0} : {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) a, b, c, alpha, beta, gamma = [None] * 6 if not all(periodic): cell = data[field]["cell_parameters"] a, b, c, alpha, beta, gamma = [ cell[p] for p in ["a", "b", "c", "alpha", "beta", "gamma"] ] curr_lineno = initial_lineno + 3 atom_data = { "ids": [], "assymetric": [], "atomic_numbers": [], "symbols": [], } atom_data["pbc"] = periodic while (lines[curr_lineno].strip() and not lines[curr_lineno].strip()[0].isalpha()): fields = lines[curr_lineno].strip().split() atom_data["ids"].append(fields[0]) atom_data["assymetric"].append(bool(strtobool(fields[1]))) atom_data["atomic_numbers"].append(int(fields[2])) atom_data["symbols"].append(fields[3].lower().capitalize()) if all(periodic): atom_data.setdefault("fcoords", []).append( [float(fields[4]), float(fields[5]), float(fields[6])]) elif periodic == [True, True, False ] and alpha == 90 and beta == 90: atom_data.setdefault("fcoords", []).append([ float(fields[4]), float(fields[5]), float(fields[6]) / c ]) elif periodic == [False, False, False]: atom_data.setdefault("ccoords", []).append( [float(fields[4]), float(fields[5]), float(fields[6])]) # TODO other periodic types (1D) curr_lineno += 1 data[field] = edict.merge([data.get(field, {}), atom_data]) # TODO These coordinates are present in initial and final optimized sections, # but DON'T work with lattice parameters if fnmatch(line, "CARTESIAN COORDINATES - PRIMITIVE CELL*"): if not fnmatch( lines[initial_lineno + 2].strip(), "*ATOM*X(ANGSTROM)*Y(ANGSTROM)*Z(ANGSTROM)", ): raise IOError( "was expecting ATOM X(ANGSTROM) Y(ANGSTROM) Z(ANGSTROM) on line:" " {0}, got: {1}".format(initial_lineno + 2, lines[initial_lineno + 2])) curr_lineno = initial_lineno + 4 atom_data = { "ids": [], "atomic_numbers": [], "symbols": [], "ccoords": [] } while (lines[curr_lineno].strip() and not lines[curr_lineno].strip()[0].isalpha()): fields = lines[curr_lineno].strip().split() if len(fields) < 6: raise IOError("was expecting ID ANUM SYMBOL X Y Z on line:" " {0}, got: {1}".format(curr_lineno, lines[curr_lineno])) atom_data["ids"].append(fields[0]) atom_data["atomic_numbers"].append(int(fields[1])) atom_data["symbols"].append(fields[2].lower().capitalize()) atom_data["ccoords"].append( [float(fields[3]), float(fields[4]), float(fields[5])]) curr_lineno += 1 data["primitive_cell"] = edict.merge( [data.get("primitive_cell", {}), atom_data]) elif fnmatch(line, "DIRECT LATTICE VECTORS CARTESIAN COMPONENTS*"): if "ANGSTROM" not in line: raise IOError("was expecting lattice vectors in angstroms on line:" " {0}, got: {1}".format(initial_lineno, line)) if not fnmatch(lines[initial_lineno + 1].strip(), "X*Y*Z"): raise IOError("was expecting X Y Z on line:" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) if "crystallographic_cell" not in data: data["crystallographic_cell"] = {} if "cell_vectors" in data["crystallographic_cell"]: raise IOError("found multiple cell vectors on line:" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) vectors = { "a": split_numbers(lines[initial_lineno + 2]), "b": split_numbers(lines[initial_lineno + 3]), "c": split_numbers(lines[initial_lineno + 4]), } data["primitive_cell"]["cell_vectors"] = vectors
def initial_parse(lines): """Scan the file for errors, and find the final elapsed time value.""" errors = [] warnings = [] parser_errors = [] mpi_abort = False telapse_line = None start_lines = {} second_opt_line = False # This is required since output looks like # OPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPT # STARTING GEOMETRY OPTIMIZATION - INFORMATION ON SCF MOVED TO SCFOUT.LOG # GEOMETRY OPTIMIZATION INFORMATION STORED IN OPTINFO.DAT # OPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPTOPT for lineno, line in enumerate(lines): if "WARNING" in line.upper(): warnings.append(line.strip()) elif "ERROR" in line: # TODO ignore errors before program execution (e.g. in mpiexec setup)? if "open_hca: getaddr_netdev ERROR" not in line: errors.append(line.strip()) elif "SCF abnormal end" in line: # only present when run using runcry errors.append(line.strip()) elif "MPI_Abort" in line: # only record one mpi_abort event (to not clutter output) if not mpi_abort: errors.append(line.strip()) mpi_abort = True elif "CONVERGENCE TESTS UNSATISFIED" in line.upper(): errors.append(line.strip()) elif "TELAPSE" in line: telapse_line = lineno # search for an optimisation elif "OPTOPTOPTOPT" in line: if "optimization" in start_lines: if second_opt_line: parser_errors.append( "found two lines starting optimization section: " "{0} and {1}".format(start_lines["optimization"], lineno)) else: second_opt_line = True start_lines["optimization"] = lineno elif ("CONVERGENCE ON GRADIENTS SATISFIED AFTER THE FIRST OPTIMIZATION CYCLE" in line): if "optimization" in start_lines: if second_opt_line: parser_errors.append( "found two lines starting optimization section: " "{0} and {1}".format(start_lines["optimization"], lineno)) else: second_opt_line = True start_lines["optimization"] = lineno # search for mulliken analysis elif line.strip().startswith("MULLIKEN POPULATION ANALYSIS"): # can have ALPHA+BETA ELECTRONS and ALPHA-BETA ELECTRONS (denoted in line above mulliken_starts) start_lines.setdefault("mulliken", []).append(lineno) # search for final geometry elif "FINAL OPTIMIZED GEOMETRY" in line: if "final_geometry" in start_lines: parser_errors.append( "found two lines starting 'FINAL OPTIMIZED GEOMETRY':" " {0} and {1}".format(start_lines["final_geometry"], lineno)) start_lines["final_geometry"] = lineno total_seconds = None if telapse_line: total_seconds = int( split_numbers(lines[telapse_line].split("TELAPSE")[1])[0]) # m, s = divmod(total_seconds, 60) # h, m = divmod(m, 60) # elapsed_time = "%d:%02d:%02d" % (h, m, s) return errors, warnings, parser_errors, total_seconds, start_lines
def parse_mulliken_analysis(lines, mulliken_indices): """ Parameters ---------- lines: list[str] mulliken_indices: list[int] Returns ------- ParsedSection """ mulliken = {} for i, indx in enumerate(mulliken_indices): name = lines[indx - 1].strip().lower() key_name = name.replace(" ", "_") if not (name == "ALPHA+BETA ELECTRONS".lower() or name == "ALPHA-BETA ELECTRONS".lower()): return ParsedSection( mulliken_indices[0], mulliken, "was expecting mulliken to be alpha+beta or alpha-beta on line:" " {0}, got: {1}".format(indx - 1, lines[indx - 1]), ) if len(mulliken_indices) > i + 1: searchlines = lines[indx + 1:mulliken_indices[i + 1]] else: searchlines = lines[indx + 1:] data_ao = {} data_shell = {} for j, line in enumerate(searchlines): if fnmatch(line.strip(), "*ATOM*Z*CHARGE*A.O.*POPULATION*"): charge_line = j + 2 while (searchlines[charge_line].strip() and not searchlines[charge_line].strip()[0].isalpha()): fields = searchlines[charge_line].strip().split() # a.o. population can wrap multiple lines if len(fields) != len( split_numbers(searchlines[charge_line])): data_ao.setdefault("ids", []).append(int(fields[0])) data_ao.setdefault("symbols", []).append( fields[1].lower().capitalize()) data_ao.setdefault("atomic_numbers", []).append(int(fields[2])) data_ao.setdefault("charges", []).append(float(fields[3])) data_ao.setdefault("aos", []).append( [float(f) for f in fields[4:]]) else: data_ao["aos"][-1].extend( split_numbers(searchlines[charge_line])) charge_line += 1 elif fnmatch(line.strip(), "*ATOM*Z*CHARGE*SHELL*POPULATION*"): charge_line = j + 2 while (searchlines[charge_line].strip() and not searchlines[charge_line].strip()[0].isalpha()): fields = searchlines[charge_line].strip().split() # shell population can wrap multiple lines if len(fields) != len( split_numbers(searchlines[charge_line])): data_shell.setdefault("ids", []).append(int(fields[0])) data_shell.setdefault("symbols", []).append( fields[1].lower().capitalize()) data_shell.setdefault("atomic_numbers", []).append(int(fields[2])) data_shell.setdefault("charges", []).append(float(fields[3])) data_shell.setdefault("shells", []).append( [float(f) for f in fields[4:]]) else: data_shell["shells"][-1].extend( split_numbers(searchlines[charge_line])) charge_line += 1 # TODO check consistency of ids, ... data_ao.update(data_shell) mulliken[key_name] = data_ao return ParsedSection(mulliken_indices[0], mulliken)
def parse_optimisation(lines, initial_lineno): """read geometric optimisation Parameters ---------- lines: list[str] initial_lineno: int Returns ------- ParsedSection """ if ("CONVERGENCE ON GRADIENTS SATISFIED AFTER THE FIRST OPTIMIZATION CYCLE" in lines[initial_lineno]): for k, line in enumerate(lines[initial_lineno:]): curr_lineno = initial_lineno + k line = line.strip() if "OPT END -" in line: if not fnmatch(line, "*E(AU)*"): raise IOError("was expecting units in a.u. on line:" " {0}, got: {1}".format(curr_lineno, line)) data = [{ "energy": { "total_corrected": convert_units(split_numbers(line)[0], "hartree", "eV") } }] return ParsedSection(curr_lineno, data) return ParsedSection( curr_lineno, [], "did not find 'OPT END', after optimisation start at line {}". format(initial_lineno), ) opt_cycles = [] opt_cyc = None scf_start_no = None failed_opt_step = False for k, line in enumerate(lines[initial_lineno:]): curr_lineno = initial_lineno + k line = line.strip() if "OPT END -" in line: if opt_cyc and not failed_opt_step: opt_cycles.append(opt_cyc) return ParsedSection(curr_lineno, opt_cycles) if fnmatch(line, "*OPTIMIZATION*POINT*"): if opt_cyc is not None and not failed_opt_step: opt_cycles.append(opt_cyc) opt_cyc = {} scf_start_no = None failed_opt_step = False elif opt_cyc is None: continue # when using ONELOG optimisation key word if "CRYSTAL - SCF - TYPE OF CALCULATION :" in line: if scf_start_no is not None: return ParsedSection( curr_lineno, opt_cycles, "found two lines starting scf ('CRYSTAL - SCF - ') in opt step {0}:" .format(len(opt_cycles)) + " {0} and {1}".format(scf_start_no, curr_lineno), ) scf_start_no = curr_lineno elif "SCF ENDED" in line: if "CONVERGE" not in line: pass # errors.append(line.strip()) outcome = parse_scf_section(lines, scf_start_no + 1, curr_lineno + 1) # TODO test if error opt_cyc["scf"] = outcome.data parse_geometry_section(opt_cyc, curr_lineno, line, lines) # TODO move to read_post_scf? if fnmatch(line, "TOTAL ENERGY*DE*"): if not fnmatch(line, "TOTAL ENERGY*AU*DE*AU*"): return ParsedSection( curr_lineno, opt_cycles, "was expecting units in a.u. on line:" " {0}, got: {1}".format(curr_lineno, line), ) opt_cyc["energy"] = opt_cyc.get("energy", {}) opt_cyc["energy"]["total_corrected"] = convert_units( split_numbers(line)[1], "hartree", "eV") for param in [ "MAX GRADIENT", "RMS GRADIENT", "MAX DISPLAC", "RMS DISPLAC" ]: if fnmatch(line, "{}*CONVERGED*".format(param)): if "convergence" not in opt_cyc: opt_cyc["convergence"] = {} opt_cyc["convergence"][param.lower().replace(" ", "_")] = bool( strtobool(line.split()[-1])) if fnmatch(line, "*SCF DID NOT CONVERGE. RETRYING WITH A SMALLER OPT STEP*"): # TODO add failed optimisation steps with dummy energy and extra parameter? # for now discard this optimisation step failed_opt_step = True if opt_cyc and not failed_opt_step: opt_cycles.append(opt_cyc) return ParsedSection( curr_lineno, opt_cycles, "did not find 'OPT END', after optimisation start at line {}".format( initial_lineno), )