def init_quantities(self): def str_to_energies(val_in): val = [v.split('=') for v in val_in.strip().split('\n')] unit = ureg.eV if '[eV]:' in val[0] else ureg.hartree return { v[0].strip(): float(v[1]) * unit for v in val if len(v) == 2 } def str_to_forces(val_in): val = [v.split() for v in val_in.strip().split('\n')] unit = ureg.eV / ureg.angstrom if '[eV/A]' in val[ 0] else ureg.hartree / ureg.bohr return np.array([v[2:5] for v in val if len(v) == 5], dtype=float) * unit super().init_quantities() self._quantities += [ Quantity( 'brilloiun_zone_sampling', r'Brillouin zone sampling([\s\S]+?)\*+\n\n', sub_parser=TextParser(quantities=[ Quantity('grid_dimensions', r'Dimensions of the k\-point grid\s*=(.*)', dtype=int), Quantity('n_kpoints', r'Total number of k\-points\s*=(.*)', dtype=int), Quantity('n_kpoints_reduced', r'Number of symmetry-reduced k\-points\s*=(.*)', dtype=int), Quantity( 'kpoints', rf'\d+\s*({re_float})\s*({re_float})\s*({re_float})\s*({re_float})', dtype=float, repeats=True) ])), Quantity('energies', r'Energy (\[[\s\S]+?)\n\n', str_operation=str_to_energies), Quantity('total_magnetic_moment', rf'Total Magnetic Moment:\s*mz\s*=\s*({re_float})'), Quantity( 'local_magnetic_moments', r'Ion\s*mz\s*([\w\s\.\-]+?)\n\n', convert=False, str_operation=lambda x: np.array( [v.split()[2] for v in x.strip().split('\n')], dtype=float)), Quantity( 'dipole', rf'Dipole:.*\[Debye\]\s*<x> =\s*\S+\s*({re_float})\s*' rf'<y> =\s*\S+\s*({re_float})\s*<z> =\s*\S+\s*({re_float})', dtype=float), Quantity( 'forces', r'Forces on the ions (\[.*\]\s*)Ion\s*x\s*y\s*z\s*([\s\S]*)', str_operation=str_to_forces, convert=False) ]
def init_quantities(self): def get_pbc_cell(val): val = val.split() pbc = [v == 'pp' for v in val[:3]] cell = np.zeros((3, 3)) for i in range(3): cell[i][i] = float(val[i * 2 + 4]) - float(val[i * 2 + 3]) return pbc, cell def get_atoms_info(val): val = val.split('\n') keys = val[0].split() values = np.array([v.split() for v in val[1:] if v], dtype=float) values = values[values[:, 0].argsort()].T return {keys[i]: values[i] for i in range(len(keys))} self._quantities = [ Quantity( 'time_step', r'\s*ITEM:\s*TIMESTEP\s*\n\s*(\d+)\s*\n', comment='#', repeats=True), Quantity( 'n_atoms', r'\s*ITEM:\s*NUMBER OF ATOMS\s*\n\s*(\d+)\s*\n', comment='#', repeats=True), Quantity( 'pbc_cell', r'\s*ITEM: BOX BOUNDS\s*([\s\w]+)([\+\-\d\.eE\s]+)\n', str_operation=get_pbc_cell, comment='#', repeats=True), Quantity( 'atoms_info', r's*ITEM:\s*ATOMS\s*([ \w]+\n)*?([\+\-eE\d\.\n ]+)', str_operation=get_atoms_info, comment='#', repeats=True) ]
def init_quantities(self): self._quantities = [ Quantity('switch', r'(TOT|FOR|QTL|EFG|FERMI)'), Quantity('emin', r'([\d\.\- ]+)\s*EMIN'), Quantity('smearing', r'(GAUSS|ROOT|TEMP|TETRA|ALL)\s*([\d\.]+)'), Quantity('gmax', r'([\d\.\-]+)\s*GMAX') ]
def init_quantities(self): self._quantities = [ Quantity('xc_functional', r'(?:TOT|KXC|POT|MULT|COUL|EXCH)\s*([\w ]+)', dtype=str), Quantity('fft', r'FFT[\s\S]+?(\d+\s+\d+\s+\d+\s+[\d\.]+)') ]
def init_quantities(self): self._quantities = [ Quantity('wf_switch', r'(WFFIL|WFPRI|ENFIL|SUPWF)'), Quantity('rkmax', r'([\d\.]+\s*\d+\s*\d+).+K\-MAX', dtype=np.float64) ]
def init_quantities(self): self._quantities = [] for header in self._headers: self._quantities.append(Quantity( header, r'\s*([\+\-eE\d\. ]+)\s*%s\s*\n' % header, comment='#', repeats=True)) def get_section_value(val): val = val.split('\n') name = None if val[0][0] == '#': name = val[0][1:].strip() val = val[1:] value = [] for i in range(len(val)): v = val[i].split('#')[0].split() if not v: continue try: value.append(np.array(v, dtype=float)) except Exception: break return name, np.array(value) for section in self._sections: self._quantities.append( Quantity( section, r'\s*%s\s*(#*\s*[\s\S]*?\n)\n*([\deE\-\+\.\s]+)\n' % section, str_operation=get_section_value, repeats=True))
def init_quantities(self): def str_to_nac(val_in): val = val_in.strip().split() nac = dict(file=val[0], method=val[1].lower()) if len(val) > 2: nac['delta'] = [float(v) for v in val[3:6]] return nac def str_to_supercell(val_in): val = [int(v) for v in val_in.strip().split()] if len(val) == 3: return np.diag(val) else: return np.reshape(val, (3, 3)) self._quantities = [ Quantity('displacement', r'\n *phonon displacement\s*([\d\.]+)', dtype=float), Quantity('symmetry_thresh', r'\n *phonon symmetry_thresh\s*([\d\.]+)', dtype=float), Quantity('frequency_unit', r'\n *phonon frequency_unit\s*(\S+)'), Quantity('supercell', r'\n *phonon supercell\s*(.+)', str_operation=str_to_supercell), Quantity('nac', r'\n *phonon nac\s*(.+)', str_operation=str_to_nac) ]
def init_quantities(self): self._quantities = [ Quantity('smearing_width', r'electron_temperature *\= *([\d\.]+)', dtype=float), Quantity('charge', r'charge *\= *([\d\.]+)', dtype=float), Quantity('xc_functional', r'exchange_correlation *\= *(\S+)') ]
def init_quantities(self): re_f = r'\-*\d+\.\d+' self._quantities = [ Quantity('labels', r'# ENERGY +(.+)', flatten=False), Quantity('data', rf'({re_f} +{re_f}.*)', repeats=True, dtype=np.dtype(np.float64)) ]
def init_quantities(self): def str_op(val): val = val.split('#')[0] val = val.replace('&\n', ' ').split() val = val if len(val) > 1 else val[0] return val self._quantities = [ Quantity( name, r'\n\s*%s\s+([\w\. \/\#\-]+)(\&\n[\w\. \/\#\-]*)*' % name, str_operation=str_op, comment='#', repeats=True) for name in self._commands] self._quantities.append(Quantity( 'program_version', r'\s*LAMMPS\s*\(([\w ]+)\)\n', dtype=str, repeats=False, flatten=False) ) self._quantities.append(Quantity( 'finished', r'\s*Dangerous builds\s*=\s*(\d+)', repeats=False) ) def str_to_thermo(val): res = {} if val.count('Step') > 1: val = val.replace('--', '').replace('=', '').replace('(sec)', '').split() val = [v.strip() for v in val] for i in range(len(val)): if val[i][0].isalpha(): res.setdefault(val[i], []) res[val[i]].append(float(val[i + 1])) else: val = val.split('\n') keys = [v.strip() for v in val[0].split()] val = np.array([v.split() for v in val[1:] if v], dtype=float).T res = {key: [] for key in keys} for i in range(len(keys)): res[keys[i]] = val[i] return res self._quantities.append(Quantity( 'thermo_data', r'\s*\-*(\s*Step\s*[\-\s\w\.\=\(\)]*[ \-\.\d\n]+)Loop', str_operation=str_to_thermo, repeats=False, convert=False) )
def init_quantities(self): self._quantities = [ Quantity('deformation', r'Lagrangian strain\s*=\s*\(([eta\s\d\.,]+)\)', str_operation=lambda x: x.replace(',', '').split(), repeats=True, dtype=str) ]
def init_quantities(self): self._quantities = [ Quantity('order', r'\s*Order of elastic constants\s*=\s*([0-9]+)', repeats=False, dtype=int), Quantity('calculation_method', r'\s*Method of calculation\s*=\s*([-a-zA-Z]+)\s*', repeats=False), Quantity('code_name', r'\s*DFT code name\s*=\s*([-a-zA-Z]+)', repeats=False), Quantity('space_group_number', r'\s*Space-group number\s*=\s*([0-9]+)', repeats=False), Quantity('equilibrium_volume', r'\s*Volume of equilibrium unit cell\s*=\s*([0-9.]+)\s*', unit='angstrom ** 3'), Quantity('max_strain', r'\s*Maximum Lagrangian strain\s*=\s*([0-9.]+)', repeats=False), Quantity('n_strains', r'\s*Number of distorted structures\s*=\s*([0-9]+)', repeats=False) ]
def init_quantities(self): def str_to_eigenvalues(val_in): val = [v.split() for v in val_in.strip().split('\n')] kpoint = [0., 0., 0.] if val[0][0] == '#st' else [ float(v.rstrip(',')) for v in val[0] ] if len(val) == 1: return eigenvalues = np.array([[v[0], v[2], v[3]] for v in val[1:]], dtype=float) eigenvalues = np.transpose(eigenvalues) nspin = 2 if eigenvalues[0][1] == 1.0 else 1 nbands = int(max(eigenvalues[0])) eigenvalues = eigenvalues[1:] eigenvalues.shape = (2, nbands, nspin) return kpoint, eigenvalues[0], eigenvalues[1] def str_to_fermi_energy(val_in): val = val_in.split() unit = ureg.eV if val[1].startswith('e') else ureg.hartree return float(val[0]) * unit self._quantities = [ Quantity( 'eigenvalues', r'(Eigenvalues \[[\s\S]+?)(?:\n\n|\Z)', sub_parser=TextParser(quantities=[ Quantity( 'eigenvalues', r'(?:(#st)\s*Spin\s*Eigenvalue\s*Occupation|#k =\s*\d+, k = \(([\-\d\.,\s]+)\))([\d\s\.\-updn]+)', str_operation=str_to_eigenvalues, repeats=True, convert=False), Quantity('unit', r'Eigenvalues \[(.*)\]', convert=False, str_operation=lambda x: 'eV' if x == 'eV' else 'hartree'), Quantity('fermi_energy', rf'Fermi energy =\s*({re_float} .*)', convert=False, str_operation=str_to_fermi_energy) ])) ]
def init_quantities(self): self._quantities = [ Quantity('voigt', r'Symmetry[\s\S]+\n\s*\n([C\d\s\n\(\)\-\+\/\*]+)\n', shape=(6, 6), dtype=str, repeats=False), Quantity( 'elastic_constant', r'Elastic constant[\s\S]+in GPa\s*:\s*\n\n([\-\d\.\s\n]+)\n', shape=(6, 6), dtype=float, unit='GPa', repeats=False), Quantity( 'compliance', r'Elastic compliance[\s\S]+in 1/GPa\s*:\s*\n\n([\-\d\.\s\n]+)\n', shape=(6, 6), dtype=float, unit='1/GPa', repeats=False) ] def str_to_modulus(val_in): val_in = val_in.strip().split() key = val_in[0] unit = val_in[-1] if len(val_in) == 3 else None val = float(val_in[1]) val = val * ureg.GPa if unit is not None else val return key, val self._quantities.append( Quantity('modulus', r',\s*(\w+)\s*=\s*([\-\+\w\. ]+?)\n', str_operation=str_to_modulus, repeats=True)) self._quantities.append( Quantity( 'eigenvalues', r'Eigenvalues of elastic constant \(stiffness\) matrix:\s*\n+([\-\d\.\n\s]+)\n', unit='GPa', repeats=False))
def init_quantities(self): def str_to_line(val_in): val = val_in.replace('"', '').replace("'", '').split('=', 1) return [v.strip().split('#')[0] for v in val] self._quantities = [ Quantity('line', r'(\w.*\s*=\s*.*)#?', str_operation=str_to_line, repeats=True) ]
def init_quantities(self): def split_eta_val(val): order, val = val.strip().split(' order fit.') val = [float(v) for v in val.strip().split()] return order, val[0::2], val[1::2] self._quantities = [ Quantity('fit', r'(\w+ order fit\.\n[\d.\s\neE\-\+]+)\n', repeats=True, convert=False, str_operation=split_eta_val) ]
def init_quantities(self): def get_sym_pos(val): val = val.strip().replace('\n', '').split() sym = [] pos = [] for i in range(0, len(val), 4): sym.append(val[i + 3].strip()) pos.append([float(val[j]) for j in range(i, i + 3)]) sym_pos = dict(symbols=sym, positions=pos) return sym_pos self._quantities = [ Quantity( 'cellpar', r'a\s*b\s*c\n([\d\.\s]+)\n\s*alpha\s*beta\s*gamma\n([\d\.\s]+)\n+', repeats=False), Quantity('sym_pos', r'Atom positions:\n\n([\s\d\.A-Za-z]+)\n\n', str_operation=get_sym_pos, repeats=False, convert=False) ]
def init_quantities(self): def arrange_matrix(val): val = val.strip().split('\n') matrix = [v.strip().split() for v in val if v.strip()] matrix = np.array(matrix).reshape((12, 18)) arranged = [] for i in range(2): for j in range(3): arranged.append(matrix[i * 6:(i + 1) * 6, j * 6:(j + 1) * 6].tolist()) return arranged self._quantities = [ Quantity('elastic_constant', r'\%\s*\n([\s0-6A-L]*)[\n\s\%1-6\-ij]*([\s0-6A-L]*)\n', str_operation=arrange_matrix, dtype=str, repeats=False, convert=False), Quantity('cijk', r'(C\d\d\d)\s*=\s*([\-\d\.]+)\s*GPa', repeats=True, convert=False) ]
def init_quantities(self): def str_to_block(val_in): val = [v.split('#')[0] for v in val_in.strip().split('\n')] val = [ v.replace('"', '').replace("'", '').split('|') for v in val if v ] val[0] = val[0][0] return val super().init_quantities() self._quantities += [ Quantity('block', r'%([\s\S]+?)%', repeats=True, str_operation=str_to_block) ]
def init_quantities(self): def get_atoms_info(val_in): val = [v.split('#')[0].split() for v in val_in.strip().split('\n')] symbols = [] for v in val: if v[0].isalpha(): if v[0] not in symbols: symbols.append(v[0]) v[0] = symbols.index(v[0]) + 1 val = np.transpose(np.array([v for v in val if len(v) == 4], dtype=float)) return dict(type=val[0], x=val[1], y=val[2], z=val[3]) self.quantities = [ Quantity( 'atoms_info', r'((?:\d+|[A-Z][a-z]?) [\s\S]+?)(?:\s\d+\n|\Z)', str_operation=get_atoms_info, comment='#', repeats=True) ]
def init_quantities(self): def str_to_block(val_in): val = val_in.strip().split('\n') name = val[0].strip().replace('"', '').replace("'", '') val = [ v.split('#')[0].split('=')[1].replace('"', '').replace("'", '').strip() for v in val[1:] ] return [name, val] super().init_quantities() self._quantities += [ Quantity('block', r'Opened block([\s\S]+?)Closed block', repeats=True, str_operation=str_to_block) ]
def init_quantities(self): re_float = r'[\d\.\-]+' re_lat = r'\d+\.\d{6}' self._quantities = [ Quantity( 'lattice', r'([\s\S]+?)\n *AT', sub_parser=TextParser(quantities=[ Quantity('nonequiv_atoms', r'NONEQUIV\.ATOMS\:\s*(\d+)', dtype=int), Quantity('lattice', r'(\w+)\s*LATTICE'), Quantity('calc_mode', r'(N*REL\S*)'), Quantity( 'lattice_constants', # fixed precision, sometimes no spaces rf'({re_lat})\s*({re_lat})\s*({re_lat})\s*({re_lat})\s*({re_lat})\s*({re_lat})', dtype=np.dtype(np.float64)), Quantity('unit', r'unit=(\w)', str_operation=lambda x: self._units_map.get( x, ureg.bohr)) ])), Quantity( 'atom', r'OM\s+\-*\d+\:\s*(X\=[\s\S]+?)LOCAL', repeats=True, sub_parser=TextParser(quantities=[ Quantity( 'positions', rf'X\=({re_float})\s*Y=({re_float})\s*Z=({re_float})', repeats=True, dtype=np.dtype(np.float64)), Quantity('atom_name', r'(\n *[A-Z][a-z]*\d* +)'), Quantity('NPT', r'NPT\s*\=\s*(\d+)', dtype=int), Quantity('R0', r'R0\s*\=\s*(\d+)', dtype=int), Quantity('Z', r'Z\:\s*([\d\.]+)', dtype=np.float64) ])) ]
def init_quantities(self): re_float = r'[\-\+\d\.Ee]+' self._energy_mapping = { 'Total Energy': 'energy_total', 'Nuclear Repulsion': 'nuc_repulsion', 'Electronic Energy': 'elec_energy', 'One Electron Energy': 'one_elec_energy', 'Two Electron Energy': 'two_elec_energy', 'Potential Energy': 'potential_energy', 'Kinetic Energy': 'kinetc_energy', r'E\(X\)': 'exchange_energy', r'E\(C\)': 'correlation_energy', r'E\(XC\)': 'exchange_correlation_energy'} self._timing_mapping = { 'Total time': 'final_time', 'Sum of individual times': 'sum_individual_times', 'Fock matrix formation': 'fock_matrix_formation', 'Coulomb formation': 'coulomb_formation', r'Split\-RI-J': 'split_rj', 'XC integration': 'xc_integration', r'Basis function eval\.': 'basis_fn_evaluation', r'Density eval\.': 'density_evaluation', r'XC\-Functional eval\.': 'xc_functional_evaluation', r'XC\-Potential eval\.': 'potential_evaluation', 'Diagonalization': 'diagonalization', 'Density matrix formation': 'density_matrix_formation', 'Population analysis': 'population_analysis', 'Initial guess': 'initial_guess', 'Orbital Transformation': 'orbital_transformation', 'Orbital Orthonormalization': 'orbital_orthonormalization', 'DIIS solution': 'diis_solution', 'Grid generation': 'grid_generation'} def str_to_cartesian_coordinates(val_in): val = [v.split() for v in val_in.strip().split('\n')] symbols = [v[0][:2] for v in val] coordinates = np.array([v[1:4] for v in val], dtype=float) return symbols, coordinates * ureg.angstrom basis_set_quantities = [ Quantity('basis_set_atom_labels', r'Type\s*(\w+)', repeats=True), Quantity('basis_set', r':\s*(\w+)\s*contracted\s*to', repeats=True), Quantity('basis_set_contracted', r'(\w+)\s*pattern', repeats=True)] basis_set_statistics_quantities = [ Quantity( 'nb_of_primitive_gaussian_shells', r'# of primitive gaussian shells\s*\.+\s*(\d+)', repeats=True, dtype=int), Quantity( 'nb_of_primitive_gaussian_functions', r'# of primitive gaussian functions\s*\.+\s*(\d+)', repeats=True, dtype=int), Quantity( 'nb_of_contracted_shells', r'# of contracted shells\s*\.+\s*(\d+)', repeats=True, dtype=int), Quantity( 'nb_of_contracted_basis_functions', r'# of contracted (?:aux-)?basis functions\s*\.+\s*(\d+)', repeats=True, dtype=int), Quantity( 'highest_angular_moment', r'Highest angular momentum\s*\.+\s*(\d+)', repeats=True, dtype=int), Quantity( 'maximum_contraction_depth', r'Maximum contraction depth\s*\.+\s*(\d+)', repeats=True, dtype=int)] grid_quantities = [ Quantity( 'gral_integ_accuracy', rf'General Integration Accuracy\s*IntAcc\s*\.+\s*({re_float})', dtype=float), Quantity( 'radial_grid_type', r'Radial Grid Type\s*RadialGrid\s*\.+\s*(\S+)', convert=False), Quantity( 'angular_grid', r'Angular Grid \(max\. acc\.\)\s*AngularGrid\s*\.+\s*(\S+)', convert=False), Quantity( 'grid_pruning_method', r'Angular grid pruning method\s*GridPruning\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'weight_gener_scheme', r'Weight generation scheme\s*WeightScheme\s*\.+\s*(\w+)', convert=False), Quantity( 'basis_fn_cutoff', rf'Basis function cutoff\s*BFCut\s*\.+\s*({re_float})', dtype=float), Quantity( 'integr_weight_cutoff', rf'Integration weight cutoff\s*WCut\s*\.+\s*({re_float})', dtype=float), Quantity( 'nb_grid_pts_after_initial_pruning', r'# of grid points \(after initial pruning\)\s*\.+\s*(\d+)', dtype=int), Quantity( 'nb_grid_pts_after_weights_screening', r'# of grid points \(after weights\+screening\)\s*\.+\s*(\d+)', dtype=int), Quantity( 'total_nb_grid_pts', r'Total number of grid points\s*\.+\s*(\d+)', dtype=int), Quantity( 'total_nb_batches', r'Total number of batches\s*\.+\s*(\d+)', dtype=int), Quantity( 'avg_nb_points_per_batch', r'Average number of points per batch\s*\.+\s*(\d+)', dtype=int), Quantity( 'avg_nb_grid_pts_per_atom', r'Average number of grid points per atom\s*\.+\s*(\d+)', dtype=int)] scf_convergence_quantities = [ Quantity( name.lower().replace(' ', '_').replace('-', '_'), rf'%s\s*\.+\s*({re_float})\s* Tolerance :\s*({re_float})' % name, dtype=float, unit=ureg.hartree) for name in [ 'Last Energy change', 'Last MAX-Density change', 'Last RMS-Density change']] population_quantities = [ Quantity( 'atomic_charges', r'[A-Z]+ ATOMIC CHARGES.*\n\-+([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=[ Quantity('species', r'\n *\d+\s*(\w+)', repeats=True), Quantity('charge', rf':\s*({re_float})', repeats=True, dtype=float), Quantity( 'total_charge', rf'Sum of atomic charges\s*:\s*({re_float})', dtype=float)])), Quantity( 'orbital_charges', r'[A-Z]+ REDUCED ORBITAL CHARGES.*\s*\-+([\s\S]+?\n\n)', sub_parser=TextParser(quantities=[ Quantity( 'atom', r'([A-Z][a-z]?\s*[spdf][\s\S]+?)\n *(?:\d|\Z)', repeats=True, sub_parser=TextParser(quantities=[ Quantity('species', r'([A-Z][a-z]?)', convert=False), Quantity('charge', rf'([spdf]\S*)\s*:\s*({re_float})', repeats=True)]))]))] self_consistent_quantities = [ Quantity( 'scf_settings', r'SCF SETTINGS\s*\-+([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=[ Quantity( 'XC_functional_type', r'Ab initio Hamiltonian\s*Method\s*\.+\s*(\S+)', convert=False), Quantity( 'XC_functional_type', r'Density Functional\s*Method\s*\.+\s*(\S+)', convert=False), Quantity( 'exchange_functional', r'Exchange Functional\s*Exchange\s*\.+\s*(\S+)', convert=False), Quantity( 'xalpha_param', rf'X-Alpha parameter\s*XAlpha\s*\.+\s*({re_float})', dtype=float), Quantity( 'beckes_beta_param', rf'Becke\'s b parameter\s*XBeta\s*\.+\s*({re_float})', dtype=float), Quantity( 'correl_functional', r'Correlation Functional Correlation\s*\.+\s*(\S+)', convert=False), Quantity( 'lda_part_of_gga_corr', r'LDA part of GGA corr\.\s*LDAOpt\s*\.+\s*(\S+)', convert=False), Quantity( 'scalar_relativistic_method', r'Scalar relativistic method\s*\.+\s*(\w+)', convert=False), Quantity( 'speed_of_light_used', rf'Speed of light used\s*Velit\s*\.+\s*({re_float})', dtype=float), Quantity( 'hf_type', r'Hartree-Fock type\s*HFTyp\s*\.+\s*(\w+)', convert=False), Quantity( 'total_charge', rf'Total Charge\s*Charge\s*\.+\s*({re_float})', dtype=float), Quantity( 'multiplicity', rf'Multiplicity\s*Mult\s*\.+\s*({re_float})', dtype=float), Quantity( 'nelectrons', rf'Number of Electrons\s*NEL\s*\.+\s*({re_float})', dtype=float), Quantity( 'nuclear_repulsion', rf'Nuclear Repulsion\s*ENuc\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'convergence_check_mode', r'Convergence Check Mode ConvCheckMode\s*\.+\s*(\S+)', convert=False), Quantity( 'energy_change_tolerance', rf'Energy Change\s*TolE\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( '1_elect_energy_change', rf'1\-El\. energy change\s*\.+\s*({re_float})', dtype=float)])), Quantity( 'dft_grid_generation', r'DFT GRID GENERATION\s*\-+([\s\S]+?\-{10})', sub_parser=TextParser(quantities=grid_quantities)), Quantity( 'scf_iterations', r'SCF ITERATIONS\s*\-+([\s\S]+?)\*{10}', sub_parser=TextParser(quantities=[Quantity( 'energy', rf'\n *\d+\s*({re_float})', repeats=True, dtype=float, unit=ureg.hartree)])), Quantity( 'final_grid', r'Setting up the final grid:([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=grid_quantities)), Quantity( 'total_scf_energy', r'TOTAL SCF ENERGY\s*\-+([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=[ Quantity( name, rf'%s\s*:\s*({re_float})' % key, dtype=float, unit=ureg.hartree) for key, name in self._energy_mapping.items()] + [ Quantity( 'virial_ratio', rf'Virial Ratio\s*:\s*({re_float})', dtype=float), Quantity( 'nb_elect_alpha_channel', rf'N\(Alpha\)\s*:\s*({re_float})', dtype=float), Quantity( 'nb_elect_beta_channel', rf'N\(Beta\)\s*:\s*({re_float})', dtype=float), Quantity( 'nb_elect_total', rf'N\(Total\)\s*:\s*({re_float})', dtype=float)])), Quantity( 'scf_convergence', r'SCF CONVERGENCE\s*\-+([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=scf_convergence_quantities)), Quantity( 'orbital_energies', r'NO\s*OCC\s*E\(Eh\)\s*E\(eV\)\s*([\s\S]+?)\n\n', str_operation=lambda x: np.array([v.split()[:4] for v in x.split('\n')], dtype=float), repeats=True), Quantity( 'mulliken', r'MULLIKEN POPULATION ANALYSIS \*\s*\*+([\s\S]+?)\*{10}', sub_parser=TextParser(quantities=population_quantities)), Quantity( 'timings', r'\n *TIMINGS\s*\-+\s*([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=[Quantity( name, rf'%s\s*\.+\s*({re_float})' % key, dtype=float, unit=ureg.s) for key, name in self._timing_mapping.items()])) ] # TODO parse more properties, add to metainfo tddft_quantities = [ Quantity( 'absorption_spectrum_electric', r'ABSORPTION SPECTRUM VIA TRANSITION ELECTRIC DIPOLE MOMENTS\s*' r'\-+[\s\S]+?\-+\n([\s\S]+?)\-{10}', str_operation=lambda x: np.array( [v.split() for v in x.strip().split('\n')]))] # TODO parse more properties, add to metainfo mp2_quantities = [ Quantity( 'mp2_basis_dimension', r'Dimension of the basis\s*\.+\s*(\d+)', dtype=int), Quantity( 'scaling_mp2_energy', rf'Overall scaling of the MP2 energy\s*\.+\s*({re_float})', dtype=float), Quantity( 'mp2_aux_basis_dimension', r'Dimension of the aux\-basis\s*\.+\s*(\d+)', dtype=int), Quantity( 'energy_method_current', rf'RI\-MP2 CORRELATION ENERGY:\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'energy_total', rf'MP2 TOTAL ENERGY:\s*({re_float})', dtype=float, unit=ureg.hartree)] def str_to_iteration_energy(val_in): val = [v.split() for v in val_in.strip().split('\n')] keys = val[0] val = np.transpose( np.array([v for v in val[1:] if len(v) == len(keys)], dtype=float)) return {keys[i]: val[i] for i in range(len(keys))} ci_quantities = [ Quantity( 'electronic_structure_method', r'Correlation treatment\s*\.+\s*(\S+)', convert=False), Quantity( 'single_excitations_on_off', r'Single excitations\s*\.+\s*(\S+)', convert=False), Quantity( 'orbital_opt_on_off', r'Orbital optimization\s*\.+\s*(\S+)', convert=False), Quantity( 'z_vector_calc_on_off', r'Calculation of Z vector\s*\.+\s*(\S+)', convert=False), Quantity( 'Brueckner_orbitals_calc_on_off', r'Calculation of Brueckner orbitals\s*\.+\s*(\S+)', convert=False), Quantity( 'perturbative_triple_excitations_on_off', r'Perturbative triple excitations\s*\.+\s*(\S+)', convert=False), Quantity( 'f12_correction_on_off', r'Calculation of F12 correction\s*\.+\s*(\S+)', convert=False), Quantity( 'frozen_core_treatment', r'Frozen core treatment\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'reference_wave_function', r'Reference Wavefunction\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'nb_of_atomic_orbitals', r'Number of AO\'s\s*\.+\s*(\d+)', dtype=int), Quantity( 'nb_of_electrons', r'Number of electrons\s*\.+\s*(\d+)', dtype=int), Quantity( 'nb_of_correlated_electrons', r'Number of correlated electrons\s*\.+\s*(\d+)', dtype=int), Quantity( 'integral_transformation', r'Integral transformation\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'level_shift_amplitude_update', rf'Level shift for amplitude update\s*\.+\s*({re_float})', dtype=float), Quantity( 'coulomb_transformation_type', r'Transformation type\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'coulomb_transformation_dimension_basis', r'Dimension of the basis\s*\.+\s*(\d+)', dtype=int), Quantity( 'nb_internal_alpha_mol_orbitals', r'Number of internal alpha\-MOs\s*\.+\s*(\d+)', dtype=int), Quantity( 'nb_internal_beta_mol_orbitals', r'Number of internal beta\-MOs\s*\.+\s*(\d+)', dtype=int), Quantity( 'pair_cutoff', rf'Pair cutoff\s*\.+\s*({re_float})', dtype=float), Quantity( 'atomic_orbital_integral_source', r'AO\-integral source\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'integral_package_used', r'Integral package used\s*\.+\s*(.+)', flatten=False, convert=False), Quantity( 'nb_alpha_pairs_included', r'Number of Alpha\-MO pairs included\s*\.+\s*(\d+)', dtype=int), Quantity( 'nb_beta_pairs_included', r'Number of Beta\-MO pairs included\s*\.+\s*(\d+)', dtype=int), Quantity( 'mp2_energy_spin_aa', rf'EMP2\(aa\)=\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'mp2_energy_spin_bb', rf'EMP2\(bb\)=\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'mp2_energy_spin_ab', rf'EMP2\(ab\)=\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'mp2_initial_guess', rf'E\(0\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'mp2_energy', rf'E\(MP2\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'mp2_total_energy', rf'Initial E\(tot\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'T_and_T_energy', rf'<T\|T>\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'total_nb_pairs_included', r'Number of pairs included\s*\.+\s*(\d+)', dtype=int), Quantity( 'iteration_energy', r'(Iter\s*E\(tot\)[\s\S]+?)\-{3}', str_operation=str_to_iteration_energy, convert=False), Quantity( 'ccsd_correlation_energy', rf'E\(CORR\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsd_total_energy', rf'E\(TOT\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'single_norm_half_ss', rf'Singles Norm <S\|S>\*\*1/2\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 't1_diagnostic', rf'T1 diagnostic\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsdt_total_triples_correction', rf'Triples Correction \(T\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsdt_aaa_triples_contribution', rf'alpha\-alpha\-alpha\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsdt_aab_triples_contribution', rf'alpha\-alpha\-beta\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), # typo in metainfo? Quantity( 'ccsdt_aba_triples_contribution', rf'alpha\-beta\-beta\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsdt_bbb_triples_contribution', rf'beta\-beta\-beta\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsdt_final_corr_energy', rf'Final correlation energy\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'ccsd_final_energy', rf'E\(CCSD\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree), Quantity( 'energy_total', rf'E\(CCSD\(T\)\)\s*\.+\s*({re_float})', dtype=float, unit=ureg.hartree)] calculation_quantities = [ Quantity( 'cartesian_coordinates', r'CARTESIAN COORDINATES \(ANGSTROEM\)\s*\-+\s*([\s\S]+?)\n\n', str_operation=str_to_cartesian_coordinates), Quantity( 'basis_set', r'\n *BASIS SET INFORMATION\s*\-+([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=basis_set_quantities)), Quantity( 'auxiliary_basis_set', r'\n *AUXILIARY BASIS SET INFORMATION\s*\-+([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=basis_set_quantities)), Quantity( 'basis_set_statistics', r'BASIS SET STATISTICS AND STARTUP INFO([\s\S]+?)\-{10}', sub_parser=TextParser(quantities=basis_set_statistics_quantities)), Quantity( 'self_consistent', r'((?:ORCA SCF|DFT GRID GENERATION)\s*\-+[\s\S]+?(?:\-{70}|\Z))', sub_parser=TextParser(quantities=self_consistent_quantities)), Quantity( 'tddft', r'ORCA TD\-DFT(?:/TDA)* CALCULATION\s*\-+\s*([\s\S]+?E\(tot\).*)', sub_parser=TextParser(quantities=tddft_quantities)), Quantity( 'mp2', r'ORCA MP2 CALCULATION([\s\S]+?MP2 TOTAL ENERGY:.+)', sub_parser=TextParser(quantities=mp2_quantities)), Quantity( 'ci', r'ORCA\-MATRIX DRIVEN CI([\s\S]+?E\(CCSD\(T\)\).*)', sub_parser=TextParser(quantities=ci_quantities) )] geometry_optimization_quantities = [Quantity( '%s_tol' % key.lower().replace(' ', '_').replace('.', ''), rf'%s\s*(\w+)\s*\.+\s*({re_float})' % key, dtype=float) for key in [ 'Energy Change', 'Max. Gradient', 'RMS Gradient', 'Max. Displacement', 'RMS Displacement']] geometry_optimization_quantities += [ Quantity( 'update_method', r'Update method\s*(\w+)\s*\.+\s*(.+)'), Quantity( 'coords_choice', r'Choice of coordinates\s*(\w+)\s*\.+\s*(.+)'), Quantity( 'initial_hessian', r'Initial Hessian\s*(\w+)\s*\.+\s*(.+)')] geometry_optimization_quantities += [ Quantity( 'cycle', r'OPTIMIZATION CYCLE\s*\d+\s*\*\s*\*+([\s\S]+?)(?:\*\s*GEOMETRY|OPTIMIZATION RUN DONE|\Z)', repeats=True, sub_parser=TextParser(quantities=calculation_quantities)), Quantity( 'final_energy_evaluation', r'FINAL ENERGY EVALUATION AT THE STATIONARY POINT([\s\S]+?FINAL SINGLE POINT ENERGY.*)', sub_parser=TextParser(quantities=calculation_quantities))] self._quantities = [ Quantity( 'program_version', r'Program Version\s*([\w_.].*)', convert=False, flatten=False), Quantity( 'program_svn', r'\(SVN:\s*\$([^$]+)\$\)\s', convert=False, flatten=False), Quantity( 'program_compilation_date', r'\(\$Date\:\s*(\w.+?)\s*\$\)', convert=False, flatten=False), Quantity( 'input_file', r'INPUT FILE\s*\=+([\s\S]+?)END OF INPUT', sub_parser=TextParser(quantities=[ Quantity('xc_functional', r'\d+>\s*!\s*(\S+)')])), Quantity( 'single_point', r'\* Single Point Calculation \*\s*\*+([\s\S]+?(?:FINAL SINGLE POINT ENERGY.*|\Z))', sub_parser=TextParser(quantities=calculation_quantities)), Quantity( 'geometry_optimization', r'\* Geometry Optimization Run \*\s*\*+([\s\S]+?(?:OPTIMIZATION RUN DONE|\Z))', sub_parser=TextParser(quantities=geometry_optimization_quantities)) ]
def init_quantities(self): re_float = r'[\d\.Ee\-\+]+' iteration_quantities = [ Quantity( 'NATO', r'(NATO\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('nr_of_independent_atoms', r'(\d+)\s*INDEPENDENT', dtype=int), Quantity('total_atoms', r'(\d+)\s*TOTAL ATOMS IN UNITCELL', dtype=int), Quantity( 'system_name', r'SUBSTANCE:\s*(.+)', flatten=False) ])), Quantity('POT', r'(POT\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('potential_option', r'POTENTIAL OPTION\s*(.+)', dtype=str, flatten=False) ])), Quantity('LAT', r'(LAT\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('lattice_const', r'LATTICE CONSTANTS=\s*([\d\. ]+)') ])), Quantity('VOL', r'(VOL\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('unit_cell_volume_bohr3', rf'UNIT CELL VOLUME\s*\=\s*({re_float})', dtype=np.float64), Quantity('spinpolarization', r'((?:NON-)*SPINPOLARIZED) CALCULATION') ])), Quantity( 'RKM', r'(RKM\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('matrix_size', r'MATRIX SIZE\s*(\d+)', dtype=int), Quantity('LOs', r'LOs:\s*(\d+)', dtype=int), Quantity('rkm', r'RKM\=\s*([\d\.]+)', dtype=np.float64) ])), Quantity( 'KPT', r'(KPT\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity( 'nr_kpts', r'NUMBER OF K-POINTS:\s*(\d+)', dtype=int) ])), Quantity('GAP', r'(GAP\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('ene_gap', rf'({re_float})\s*Ry', dtype=np.float64, unit=ureg.rydberg) ])), Quantity('NOE', r'(NOE\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('noe', rf'NUMBER OF ELECTRONS\s*\=\s*({re_float})', dtype=np.float64) ])), Quantity( 'FER', r'(FER\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity( 'energy_reference_fermi_iteration', rf'F E R M I \- ENERGY.+?\=\s*([\d\.\-\+Ee ]+)', str_operation=lambda x: [float(v) for v in x.strip().split()] * ureg.rydberg, convert=False) ])), Quantity( 'GMA', r'(GMA\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('cutoff', rf'POTENTIAL AND CHARGE CUT\-OFF\s*({re_float})', dtype=np.float64) ])), Quantity( 'POSi', r'(POS\d+\:[\s\S]+?)\n *\:', repeats=True, sub_parser=TextParser(quantities=[ Quantity('atom_mult', r'MULT.*?\s*\=\s*(\d+)', dtype=int), Quantity( 'position', rf'POSITION\s*\=\s*({re_float}\s*{re_float}\s*{re_float})', dtype=np.float64) ])), Quantity( 'CHAi', r'(CHA\d+\:[\s\S]+?)\n *\:', repeats=True, sub_parser=TextParser(quantities=[ Quantity('tot_val_charge_cell', rf'TOTAL .+?CHARGE INSIDE.+?\=\s*({re_float})', dtype=np.float64) ])), Quantity('SUM', r'(SUM\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('energy_sum_eigenvalues_scf_iteration', rf'SUM OF EIGENVALUES\s*\=\s*({re_float})', dtype=np.float64, unit=ureg.rydberg) ])), Quantity( 'RTOi', rf'RTO\d+\:\s*\d+\s*({re_float})\s*({re_float})\s*({re_float})\s*({re_float})\s*', dtype=np.dtype(np.float64), repeats=True), Quantity('NTO', r'(NTO\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('tot_int_charge_nm', rf'CHARGE\s*\=\s*({re_float})', dtype=np.float64) ])), Quantity('NTOi', r'(NTO\d+\:[\s\S]+?)\n *\:', repeats=True, sub_parser=TextParser(quantities=[ Quantity('tot_charge_in_sphere_nm', rf'CHARGE.+\=\s*({re_float})', dtype=np.float64) ])), Quantity( 'DTOi', r'(DTO\d+\:[\s\S]+?)\n *\:', repeats=True, sub_parser=TextParser(quantities=[ Quantity('tot_diff_charge', rf'TOTAL\s*DIFFERENCE CHARGE.+\=\s*({re_float})', dtype=np.float64) ])), Quantity('DIS', r'(DIS\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('charge_distance', rf'CHARGE DISTANCE.+\)\s*({re_float})', dtype=np.float64) ])), Quantity('CTO', r'(CTO\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('tot_int_charge', rf'CHARGE\s*\=\s*({re_float})', dtype=np.float64) ])), Quantity( 'CTOi', r'(CTO\d+\:[\s\S]+?)\n *\:', repeats=True, sub_parser=TextParser(quantities=[ Quantity('tot_charge_in_sphere', rf'TOTAL\s*CHARGE IN SPHERE.+\=\s*({re_float})', dtype=np.float64) ])), Quantity( 'NECi', rf'NEC\d+\:\s*NUCLEAR AND ELECTRONIC CHARGE\s*({re_float})\s*({re_float})', dtype=np.dtype(np.float64), repeats=True), Quantity( 'MMINT', r'(MMINT\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity( 'mmint', rf'MAGNETIC MOMENT IN INTERSTITIAL\s*\=\s*({re_float})', dtype=np.float64) ])), Quantity( 'MMIi', r'(MMI\d+\:[\s\S]+?)\n *\:', repeats=True, sub_parser=TextParser(quantities=[ Quantity('mmi', rf'MAGNETIC MOMENT IN SPHERE\s*\=\s*({re_float})', dtype=np.float64) ])), Quantity( 'MMTOT', r'(MMTOT\s*\:[\s\S]+?)\n *\:', sub_parser=TextParser(quantities=[ Quantity('mmtot', rf' MAGNETIC MOMENT IN CELL\s*\=\s*({re_float})', dtype=np.float64) ])), Quantity('ENE', r'(ENE\s*\:[\s\S]+?)\n', sub_parser=TextParser(quantities=[ Quantity('energy_total_scf_iteration', rf'TOTAL ENERGY IN Ry\s*\=\s*({re_float})', dtype=np.float64, unit=ureg.rydberg) ])), Quantity( 'FORi', rf'FOR\d+\:\s*\d+\.ATOM\s*({re_float}\s*{re_float}\s*{re_float}\s*{re_float})', repeats=True, dtype=np.dtype(np.float64)), Quantity( 'FGLi', rf'FGL\d+\:\s*\d+\.ATOM\s*({re_float}\s*{re_float}\s*{re_float})', repeats=True, dtype=np.dtype(np.float64)) ] self._quantities = [ Quantity('version', r'LABEL\d+\:\s*using WIEN2k_(\S+) \(Release ([\d\/]+)\)', flatten=False), Quantity( 'start_date', r'LABEL\d+\:\s*on .+ at \w+ (\w+ \d+ \d\d\:\d\d\:\d\d)\s*\w*\s*(\d+)', flatten=False), Quantity('iteration', r'\d+\:\s*(\d+\.\s* ITERATION[\s\S]+?)(?:\:ITE|\Z)', repeats=True, sub_parser=TextParser(quantities=iteration_quantities)) ]
def init_quantities(self): def str_to_option(val_in): val = val_in.strip().split(':') return [val[0].strip(), ''.join(val[1:]).strip()] def str_to_cell(val_in): unit = ureg.angstrom if val_in.lower().startswith( 'a') else ureg.bohr val = [v.split() for v in val_in.strip().split('\n')[1:]] return np.array(val, dtype=float) * unit def str_to_spacing(val_in): unit = ureg.angstrom if val_in.startswith('A') else ureg.bohr return np.array(val_in.split()[1:4], dtype=float) * unit def str_to_energy(val_in): val = val_in.split() unit = ureg.eV if val[1].startswith('e') else ureg.hartree return float(val[0]) * unit def str_to_td_iteration(val_in): val = val_in.strip().split() return dict(iter=int(val[0]), time=float(val[1]), energy=float(val[2]), scfsteps=int(val[3]), elapsed_time=float(val[4])) iteration_quantities = [ Quantity('energy_total', rf'etot\s*=\s*({re_float})'), # TODO scf_iteration eigenvalues are sometimes truncated and unusable Quantity('fermi_level', rf'Fermi energy\s*=\s*({re_float} .*)', str_operation=str_to_energy, convert=False), Quantity('time', r'Elapsed time for SCF step\s*\d+:\s*([\d\.]+)', unit='s', dtype=float) ] self._quantities = [ Quantity('header', r'Running octopus([\s\S]+?)\*{10}', sub_parser=TextParser(quantities=[ Quantity('options', r'\n *([\w ]+?\s*:\s*.*)', str_operation=str_to_option, repeats=True), ])), Quantity( 'grid', r'\*\s*Grid\s*\*+\s*([\s\S]+?)\*{10}', sub_parser=TextParser(quantities=[ Quantity('boxshape', r'Type\s*=\s*(.*)'), Quantity( 'npbc', r'Octopus will treat the system as periodic in (\S+) dim', dtype=int), Quantity('cell', r'Lattice Vectors \[(.*)\]([-\d\s\.]+)', str_operation=str_to_cell, convert=False), Quantity( 'spacing', r'Spacing \[(.*)\] = \(\s*(\S+), (\S+), (\S+)\s*\)', str_operation=str_to_spacing, convert=False) ])), Quantity( 'theory_level', r'\*\s*Theory Level\s*\*+\s*([\s\S]+?)\*{10}', sub_parser=TextParser(quantities=[ Quantity('theory_level', r'\[TheoryLevel = (.+)\]', flatten=False), Quantity('exchange', r'Exchange\s+(.*) \(', flatten=False), Quantity( 'correlation', r'Correlation\s+(.*) \(', flatten=False) ])), Quantity( 'self_consistent', r'Info: Starting SCF iteration\.\s*([\s\S]+?)Info: SCF', repeats=True, sub_parser=TextParser(quantities=[ Quantity('iteration', r'SCF CYCLE ITER #\s*(\d+\s*\*+[\s\S]+?)\*{10}', repeats=True, sub_parser=TextParser( quantities=iteration_quantities)) ])), Quantity( 'time_dependent', r'Time\-Dependent Simulation \*+([\s\S]+?)Info: Finished writing information', repeats=True, sub_parser=TextParser(quantities=[ Quantity( 'iteration', rf'\n *(\d+\s*{re_float}\s*{re_float}\s*\d+\s*{re_float}) *\n', str_operation=str_to_td_iteration, repeats=True, convert=False) ])), Quantity('x_octopus_info_scf_converged_iterations', r'SCF converged in\s*(\d+) iterations', dtype=int), Quantity( 'minimization', r'(MINIMIZATION ITER #:\s*\d+\s*\++\s*Energy[\s\S]+?\+{10})', repeats=True, sub_parser=TextParser(quantities=[ Quantity('energy_total', rf'Energy\s*=\s*({re_float} .*)', str_operation=str_to_energy, convert=False), Quantity('number', r'ITER #:\s*(\d+)', dtype=int) ])) # calculation results are not printed in outfile but in info ] self._header = None
def init_quantities(self): def str_to_header(val_in): val = [v.split(':', 1) for v in val_in.strip().split('\n')] return {v[0].strip(): v[1].strip() for v in val if len(v) == 2} def str_to_input_parameters(val_in): re_array = re.compile(r'\s*([\w\-]+)\[[\d ]+\]\s*=\s*\{*(.+)') re_scalar = re.compile(r'\s*([\w\-]+)\s*=\s*(.+)') parameters = dict() val = val_in.strip().split('\n') for val_n in val: val_scalar = re_scalar.match(val_n) if val_scalar: parameters[val_scalar.group(1)] = val_scalar.group(2) continue val_array = re_array.match(val_n) if val_array: parameters.setdefault(val_array.group(1), []) value = [ float(v) for v in val_array.group(2).rstrip('}').split(',') ] parameters[val_array.group(1)].append( value[0] if len(value) == 1 else value) return parameters def str_to_energies(val_in): energy_keys_re = re.compile(r'(.+?)(?: |\Z| P)') keys = [] values = [] energies = dict() for val in val_in.strip().split('\n'): val = val.strip() if val[0].isalpha(): keys = [k.strip() for k in energy_keys_re.findall(val)] keys = [ 'P%s' % k if k.startswith('res') else k for k in keys if k ] else: values = val.split() for n, key in enumerate(keys): if key == 'Temperature': energies[key] = float(values[n]) * ureg.kelvin elif key.startswith('Pres'): key = key.rstrip(' (bar)') energies[key] = float(values[n]) * ureg.bar else: energies[key] = float(values[n]) / MOL * ureg.kJ return energies def str_to_step_info(val_in): val = val_in.strip().split('\n') keys = val[0].split() values = [float(v) for v in val[1].split()] return {key: values[n] for n, key in enumerate(keys)} thermo_quantities = [ Quantity('energies', r'Energies \(kJ/mol\)\s*([\s\S]+?)\n\n', str_operation=str_to_energies, convert=False), Quantity('step_info', r'(Step.+\n[\d\.\- ]+)', str_operation=str_to_step_info, convert=False) ] self._quantities = [ Quantity('time_start', r'Log file opened on (.+)', flatten=False), Quantity( 'host_info', r'Host:\s*(\S+)\s*pid:\s*(\d+)\s*rank ID:\s*(\d+)\s*number of ranks:\s*(\d*)' ), Quantity('module_version', r'GROMACS:\s*(.+?),\s*VERSION\s*(\S+)', flatten=False), Quantity('execution_path', r'Executable:\s*(.+)'), Quantity('working_path', r'Data prefix:\s*(.+)'), # TODO cannot understand treatment of the command line in the old parser Quantity('header', r'(GROMACS version:[\s\S]+?)\n\n', str_operation=str_to_header), Quantity('input_parameters', r'Input Parameters:\s*([\s\S]+?)\n\n', str_operation=str_to_input_parameters), Quantity('step', r'(Step\s*Time[\s\S]+?Energies[\s\S]+?\n\n)', repeats=True, sub_parser=TextParser(quantities=thermo_quantities)), Quantity('averages', r'A V E R A G E S ====>([\s\S]+?\n\n\n)', sub_parser=TextParser(quantities=thermo_quantities)), Quantity('time_end', r'Finished \S+ on rank \d+ (.+)', flatten=False) ]
from . import metainfo # pylint: disable=unused-import ''' This is a hello world style example for an example parser/converter. ''' def str_to_sites(string): sym, pos = string.split('(') pos = np.array(pos.split(')')[0].split(',')[:3], dtype=float) return sym, pos calculation_parser = UnstructuredTextFileParser(quantities=[ Quantity('sites', r'([A-Z]\([\d\.\, \-]+\))', str_operation=str_to_sites), Quantity( System.lattice_vectors, r'(?:latice|cell): \((\d)\, (\d), (\d)\)\,?\s*\((\d)\, (\d), (\d)\)\,?\s*\((\d)\, (\d), (\d)\)\,?\s*', repeats=False), Quantity('energy', r'energy: (\d\.\d+)'), Quantity('magic_source', r'done with magic source\s*\*{3}\s*\*{3}\s*[^\d]*(\d+)', repeats=False)]) mainfile_parser = UnstructuredTextFileParser(quantities=[ Quantity('date', r'(\d\d\d\d\/\d\d\/\d\d)', repeats=False), Quantity('program_version', r'super\_code\s*v(\d+)\s*', repeats=False), Quantity( 'calculation', r'\s*system \d+([\s\S]+?energy: [\d\.]+)([\s\S]+\*\*\*)*', sub_parser=calculation_parser, repeats=True) ])
def init_quantities(self): re_float = r'[\d\.\-\+ED]+' def str_to_functional(val_in): val = [v.strip() for v in val_in.strip().rsplit(' ', 1)] if len(val) == 2: val = [val[0]] + val[1].split() return val def str_to_energy(val_in): separator = '=' if '=' in val_in else ':' val = [v.strip() for v in val_in.strip().split(separator)] return val[0], float(val[1]) * ureg.hartree def str_to_labels_positions(val_in): val = [v.strip().split() for v in val_in.strip().split('\n')] labels, positions = [], [] for val_i in val: labels.append(val_i[1]) positions.append(val_i[3:6]) positions = np.array(positions, dtype=np.dtype(np.float64)) return labels, positions * ureg.angstrom def str_to_labels_positions_forces(val_in): val = np.transpose([v.split() for v in val_in.strip().split('\n')]) labels = val[0] positions = np.transpose( np.array(val[2:5], dtype=np.dtype(np.float64))) forces = np.transpose( np.array(val[5:8], dtype=np.dtype(np.float64))) return labels, positions * ureg.bohr, forces * ureg.hartree / ureg.bohr geometry_quantities = [ Quantity('labels_positions', r'No\.\s*Tag\s*Charge\s*X\s*Y\s*Z[\s\-]+([\s\S]+?)\n *\n', str_operation=str_to_labels_positions, convert=False), Quantity('lattice_vectors', r'a1\=\<([\d\.\- ]+)\>\s*' r'a2\=\<([\d\.\- ]+)\>\s*' r'a3\=\<([\d\.\- ]+)\>\s*', dtype=np.dtype(np.float64), shape=(3, 3), unit=ureg.bohr) ] dft_quantities = geometry_quantities + [ Quantity('general_info', r'General Information\s+\-+([\s\S]+?)\n *\n', sub_parser=TextParser(quantities=[ Quantity('info', r' +([\w\. ]+\s*\:\s*[\w\.\- ]+)', str_operation=lambda x: [v.strip() for v in x.split(':')], repeats=True) ])), Quantity('xc_info', r'XC Information\s+\-+([\s\S]+?)\n *\n', sub_parser=TextParser(quantities=[ Quantity('functional', r'(.+(?:Functional|Exchange|POtential).+)', str_operation=str_to_functional, repeats=True) ])), Quantity('energy', rf'([\w \-\.]+ energy\s*=\s*{re_float})', str_operation=str_to_energy, repeats=True, convert=False), Quantity( 'labels_positions_forces', r'atom\s*coordinates\s*gradient\s*x\s*y\s*z\s*x\s*y\s*z\s*([\s\S]+?\n *\n)', str_operation=str_to_labels_positions_forces, convert=False), Quantity( 'qmd_info', r'QMD Run Information\s*\-+\s*([\s\S]+?)\n *\n', str_operation=lambda x: [[vi.strip() for vi in v.split(':')] for v in x.split('\n')], convert=False), Quantity( 'self_consistency', r'convergence\s*iter\s*energy\s*DeltaE\s*RMS\-Dens\s*Diis\-err\s*time\s*' r'([\s\S]+?)\n *\n *\n', sub_parser=TextParser(quantities=[ Quantity( 'iteration', rf'd\=\s*\d+,ls=[\d\.]+,diis\s*\d+\s*({re_float})\s*({re_float})\s*{re_float}\s*{re_float}\s*({re_float})', dtype=np.dtype(np.float64), repeats=True) ])), ] dft_gradient_quantities = [ Quantity( 'labels_positions_forces', r'atom\s*coordinates\s*gradient\s*x\s*y\s*z\s*x\s*y\s*z\s*([\s\S]+?\n *\n)', str_operation=str_to_labels_positions_forces, convert=False), Quantity('energy', r'\@ Step\s*Energy.+\s*\@[\-\s]+\@\s*(.+)'), ] pw_quantities = geometry_quantities + [ Quantity('energy', rf'([\w \-]+ energy\s*\:\s*{re_float})', str_operation=str_to_energy, repeats=True, convert=False), Quantity( 'spin_S2', rf'\<S\^2\>\s*\=\s*({re_float})', dtype=np.float64), Quantity( 'parameters', r'options\:\s*([\s\S]+?)\n *\n', str_operation=lambda x: [[vi.strip() for vi in v.split('=')] for v in x.split('\n')]), Quantity( 'self_consistency', r'ITERATION STARTED([\s\S]+?)ITERATION ENDED', sub_parser=TextParser(quantities=[ Quantity( 'iteration', rf'\d+\s+({re_float}\s+{re_float})\s+{re_float}\n', repeats=True, dtype=np.dtype(np.float64)) ])), Quantity('total_charge', rf'total charge\:\s*({re_float})', dtype=np.float64) # TODO add xc functionals cannot find mapping ] calculation_quantities = geometry_quantities + [ Quantity('dft', r'(DFT Module[\s\S]+?)(?:NWChem|\Z)', repeats=True, sub_parser=TextParser(quantities=dft_quantities)), Quantity( 'dft_gradient', r'(DFT Gradient Module[\s\S]+?)(?:NWChem|\Z)', repeats=True, sub_parser=TextParser(quantities=dft_gradient_quantities)), Quantity('pw', r'(PSPW Calculation[\s\S]+?)(?:\*\s*NWPW|\Z)', repeats=True, sub_parser=TextParser(quantities=pw_quantities)) ] self._quantities = [ Quantity( 'version', r'Northwest Computational Chemistry Package \(NWChem\) (.+)\n', flatten=False, convert=False), Quantity('job_info', r'Job information\s*\-+\s*([\s\S]+?)\n *\n *\n', sub_parser=TextParser(quantities=[ Quantity('info', r'(\w.+?\s*\=\s*.+)', str_operation=lambda x: [v.strip() for v in x.split('=')], convert=False, repeats=True) ])), Quantity('input', r'(Input Module[\s\S]+? {30}NWChem)', sub_parser=TextParser(quantities=geometry_quantities)), Quantity( 'single_point', r'(DFT Module\s+\-+[\s\S]+?)(?:GA Statistics for process|NWChem Input Module|\Z)', sub_parser=TextParser(quantities=calculation_quantities)), Quantity( 'geometry_optimization', r'(Geometry Optimization[\s\S]+?)(?:GA Statistics for process|NWChem Input Module|\Z)', sub_parser=TextParser(quantities=[ Quantity( 'parameters', r'(maximum gradient threshold\s*\(gmax\)[\s\S]+?)\n *\n', str_operation=lambda x: [[vi.strip() for vi in v.split('=')] for v in x.split('\n')]), Quantity( 'iteration', r'p\s+\d+\s*\-+\s*(Geometry[\s\S]+?(?:\-+\s*Ste|\Z))', repeats=True, sub_parser=TextParser( quantities=calculation_quantities)) ])), Quantity( 'molecular_dynamics', r'(QMD Module[\s\S]+?)(?:GA Statistics for process|NWChem Input Module|\Z)', sub_parser=TextParser(quantities=[ Quantity('parameters', r'QMD Run Parameters\s*\-+\s*([\s\S]+?)\n *\n', str_operation=lambda x: [[vi.strip() for vi in v.split(':')] for v in x.split('\n')]), Quantity('iteration', r'(DFT Module[\s\S]+?(?:NWChem|\Z))', repeats=True, sub_parser=TextParser( quantities=calculation_quantities)) ])), Quantity( 'pw', r'(PSPW Calculation[\s\S]+?)(?:\*\s*NWPW|GA Statistics for process|\Z)', repeats=True, sub_parser=TextParser(quantities=pw_quantities)) # TODO implement frequency analysis # TODO add timings ]
from nomad.datamodel.metainfo.public import section_single_configuration_calculation as SCC from nomad.datamodel.metainfo.public import section_method as Method from nomad.datamodel.metainfo.public import section_sampling_method from nomad.datamodel.metainfo.public import section_XC_functionals as xc_functionals from nomad.datamodel.metainfo.public import section_eigenvalues from nomad.parsing.file_parser import UnstructuredTextFileParser, Quantity from .metainfo.openmx import OpenmxSCC # pylint: disable=unused-import ''' This is parser for OpenMX DFT code. ''' A = (1 * units.angstrom).to_base_units().magnitude scf_step_parser = UnstructuredTextFileParser(quantities=[ Quantity('NormRD', r'NormRD=\s*([\d\.]+)', repeats=False), Quantity('Uele', r'Uele=\s*([-\d\.]+)', repeats=False) ]) md_step_parser = UnstructuredTextFileParser(quantities=[ Quantity('SCF', r' (SCF=.+?Uele=\s*[-\d\.]+)', sub_parser=scf_step_parser, repeats=True), Quantity('Utot', r'Utot\.\s+(-?\d+\.\d+)', repeats=False) ]) species_and_coordinates_parser = UnstructuredTextFileParser(quantities=[ Quantity( 'atom', r'\s*\d+\s*([A-Za-z]{1,2})\s*([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+[\d\.]+\s*[\d\.]+\s*',