def __init__(self): self.all_elemental_props = dict() available_props = [] self.data_dir = os.path.join(module_dir, "data_files", 'magpie_elementdata') # Make a list of available properties for datafile in glob(os.path.join(self.data_dir, "*.table")): available_props.append( os.path.basename(datafile).replace('.table', '')) # parse and store elemental properties for descriptor_name in available_props: with open(os.path.join(self.data_dir, '{}.table'.format(descriptor_name)), 'r') as f: self.all_elemental_props[descriptor_name] = dict() lines = f.readlines() for atomic_no in range(1, len(_pt_data) + 1): # max Z=103 try: if descriptor_name in ["OxidationStates"]: prop_value = [float(i) for i in lines[atomic_no - 1].split()] else: prop_value = float(lines[atomic_no - 1]) except (ValueError, IndexError): prop_value = float("NaN") self.all_elemental_props[descriptor_name][ Element.from_Z(atomic_no).symbol] = prop_value
def cell_to_structure( cell: Tuple[List[List[float]], List[List[float]], List[int]]) -> Structure: """ cell: (Lattice parameters [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]], Fractional atomic coordinates in an Nx3 array, Z numbers of species in a length N array) """ species = [Element.from_Z(i) for i in cell[2]] return Structure(lattice=cell[0], coords=cell[1], species=species)
def find_seekpath_data(self) -> None: """Get full information of seekpath band path. """ self._seekpath_data = \ seekpath.get_explicit_k_path(structure=self.cell, symprec=self.symprec, angle_tolerance=self.angle_tolerance, with_time_reversal=self.time_reversal, reference_distance=self.ref_distance) lattice = self._seekpath_data["primitive_lattice"] element_types = self._seekpath_data["primitive_types"] species = [Element.from_Z(i) for i in element_types] positions = self._seekpath_data["primitive_positions"] self._band_primitive = Structure(lattice, species, positions)
def __init__(self): dfile = os.path.join(module_dir, "data_files/megnet_elemental_embedding.json") self._dummy = "Dummy" with open(dfile, "r") as fp: embeddings = json.load(fp) self.prop_names = ["embedding {}".format(i) for i in range(1, 17)] self.all_element_data = {} for i in range(95): embedding_dict = dict(zip(self.prop_names, embeddings[i])) if i == 0: self.all_element_data[self._dummy] = embedding_dict else: self.all_element_data[str(Element.from_Z(i))] = embedding_dict
def __init__(self, prop_name: Union[str, List[str]], func: Callable = None, search_tp: str = "name", **kwargs): """ Args: prop_name:(str,list of str) prop name or list of prop name func:(callable or list of callable) please make sure the size of it is the same with prop_name. search_tp:(str) location method. "name" for dict "number" for int. """ super(AtomPymatgenPropMap, self).__init__(search_tp=search_tp, **kwargs) if isinstance(prop_name, (list, tuple)): self.prop_name = list(prop_name) else: self.prop_name = [ prop_name, ] if func is None: func = len(self.prop_name) * [process_uni] if isinstance(func, (list, tuple)): self.func = list(func) else: self.func = [ func, ] if len(self.func) == 1 and len(self.prop_name) > 1: self.func *= len(self.prop_name) assert len(self.prop_name) == len( self.func), "The size of prop and func should be same." self.func = [process_uni if i is None else i for i in self.func] for i, j in enumerate(self.prop_name): if j in after_treatment_func_map_ele: self.func[i] = after_treatment_func_map_ele[j] self.da = [Element.from_Z(i) for i in range(1, 119)] self.da.insert(0, None) # for start from 1 self.ele_map = []
def get_atom_feature( self, mol, atom # type: ignore ) -> Dict: # type: ignore """ Generate all features of a particular atom Args: mol (pybel.Molecule): Molecule being evaluated atom (pybel.Atom): Specific atom being evaluated Return: (dict): All features for that atom """ # Get the link to the OpenBabel representation of the atom obatom = atom.OBAtom atom_idx = atom.idx - 1 # (pybel atoms indices start from 1) # Get the element element = Element.from_Z(obatom.GetAtomicNum()).symbol # Get the fast-to-compute properties output = { "element": element, "atomic_num": obatom.GetAtomicNum(), "formal_charge": obatom.GetFormalCharge(), "hybridization": 6 if element == "H" else obatom.GetHyb(), "acceptor": obatom.IsHbondAcceptor(), "donor": obatom.IsHbondDonorH() if atom.type == "H" else obatom.IsHbondDonor(), "aromatic": obatom.IsAromatic(), "coordid": atom.coordidx, } # Get the chirality, if desired if "chirality" in self.atom_features: # Determine whether the molecule has chiral centers chiral_cc = self._get_chiral_centers(mol) if atom_idx not in chiral_cc: output["chirality"] = 0 else: # 1 --> 'R', 2 --> 'S' output["chirality"] = 1 if chiral_cc[atom_idx] == "R" else 2 # Find the rings, if desired if "ring_sizes" in self.atom_features: rings = mol.OBMol.GetSSSR( ) # OpenBabel caches ring computation internally, no need to cache ourselves output["ring_sizes"] = [ r.Size() for r in rings if r.IsInRing(atom.idx) ] return output
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") num_elec_patt = re.compile( "(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile( "^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile( "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile( "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") mo_coeff_patt = re.compile("Molecular Orbital Coefficients:") mo_coeff_name_patt = re.compile( "\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] self.eigenvalues = [] self.is_spin = False coord_txt = [] read_coord = 0 read_mulliken = False read_eigen = False eigen_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] read_mo = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append( [float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend( [float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False # read molecular orbital eigenvalues if read_eigen: m = orbital_patt.search(line) if m: eigen_txt.append(line) else: read_eigen = False self.eigenvalues = {Spin.up: []} for eigenline in eigen_txt: if "Alpha" in eigenline: self.eigenvalues[Spin.up] += [ float(e) for e in float_patt.findall(eigenline) ] elif "Beta" in eigenline: if Spin.down not in self.eigenvalues: self.eigenvalues[Spin.down] = [] self.eigenvalues[Spin.down] += [ float(e) for e in float_patt.findall(eigenline) ] eigen_txt = [] # read molecular orbital coefficients if read_mo: # build a matrix with all coefficients all_spin = [Spin.up] if self.is_spin: all_spin.append(Spin.down) mat_mo = {} for spin in all_spin: mat_mo[spin] = np.zeros( (self.num_basis_func, self.num_basis_func)) nMO = 0 end_mo = False while nMO < self.num_basis_func and not end_mo: f.readline() f.readline() self.atom_basis_labels = [] for i in range(self.num_basis_func): line = f.readline() # identify atom and OA labels m = mo_coeff_name_patt.search(line) if m.group(1).strip() != "": iat = int(m.group(2)) - 1 # atname = m.group(3) self.atom_basis_labels.append( [m.group(4)]) else: self.atom_basis_labels[iat].append( m.group(4)) # MO coefficients coeffs = [ float(c) for c in float_patt.findall(line) ] for j in range(len(coeffs)): mat_mo[spin][i, nMO + j] = coeffs[j] nMO += len(coeffs) line = f.readline() # manage pop=regular case (not all MO) if nMO < self.num_basis_func and \ ("Density Matrix:" in line or mo_coeff_patt.search(line)): end_mo = True warnings.warn( "POP=regular case, matrix coefficients not complete" ) f.readline() self.eigenvectors = mat_mo read_mo = False # build a more convenient array dict with MO coefficient of # each atom in each MO. # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... } mo = {} for spin in all_spin: mo[spin] = [[ {} for iat in range(len(self.atom_basis_labels)) ] for j in range(self.num_basis_func)] for j in range(self.num_basis_func): i = 0 for iat in range(len(self.atom_basis_labels)): for label in self.atom_basis_labels[iat]: mo[spin][j][iat][ label] = self.eigenvectors[spin][i, j] i += 1 self.molecular_orbital = mo elif parse_freq: m = freq_patt.search(line) if m: values = [ float(_v) for _v in m.groups()[0].split() ] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i:i + 3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif num_elec_patt.search(line): m = num_elec_patt.search(line) self.electrons = (int(m.group(1)), int(m.group(2))) elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append( float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif not read_eigen and orbital_patt.search(line): eigen_txt.append(line) read_eigen = True elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True elif mo_coeff_patt.search(line): if "Alpha" in line: self.is_spin = True read_mo = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = { int(m.group(1)): [m.group(2), float(m.group(3))] } mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: #raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + \ ": Termination error or bad Gaussian output file !")
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination of Gaussian") std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None coord_txt = [] read_coord = 0 orbitals_txt = [] parse_stage = 0 num_basis_found = False terminated = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif route_patt.search(line): self.route = {} for tok in line.split(): sub_tok = tok.strip().split("=") key = sub_tok[0].upper() self.route[key] = sub_tok[1].upper() if len(sub_tok) > 1 else "" m = re.match("(\w+)/([^/]+)", key) if m: self.functional = m.group(1) self.basis_set = m.group(2) elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append(map(float, toks[3:6])) self.structures.append(Molecule(sp, coords)) elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif (not num_basis_found) and num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append(float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif orbital_patt.search(line): orbitals_txt.append(line) if not terminated: raise IOError("Bad Gaussian output file.")
def Bk_symbol(): return [str(Element.from_Z(97))]
def max_z_Cm_symbol(): return [str(Element.from_Z(96))]
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile( "^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile( '^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile( "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile( "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] coord_txt = [] read_coord = 0 read_mulliken = False orbitals_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append([float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend([float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False elif parse_freq: m = freq_patt.search(line) if m: values = [float(_v) for _v in m.groups()[0].split()] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i:i+3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append(float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif orbital_patt.search(line): orbitals_txt.append(line) elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = {int(m.group(1)): [m.group(2), float(m.group(3))]} mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: #raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + \ ": Termination error or bad Gaussian output file !")
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile("^\s*Mulliken atomic charges") mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} coord_txt = [] read_coord = 0 read_mulliken = 0 orbitals_txt = [] parse_stage = 0 num_basis_found = False terminated = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif route_patt.search(line): self.route = {} for tok in line.split(): sub_tok = tok.strip().split("=") key = sub_tok[0].upper() self.route[key] = sub_tok[1].upper() \ if len(sub_tok) > 1 else "" m = re.match("(\w+)/([^/]+)", key) if m: self.functional = m.group(1) self.basis_set = m.group(2) elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = { int(m.group(1)): [m.group(2), float(m.group(3))] } mulliken_charges.update(dict) read_mulliken = 0 self.Mulliken_charges = mulliken_charges if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append( [float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append( float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif orbital_patt.search(line): orbitals_txt.append(line) elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = 1 if not terminated: raise IOError("Bad Gaussian output file.")
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile( "^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile( "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile( "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] coord_txt = [] read_coord = 0 read_mulliken = False orbitals_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append( [float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend( [float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False elif parse_freq: m = freq_patt.search(line) if m: values = [ float(_v) for _v in m.groups()[0].split() ] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i:i + 3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append( float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif orbital_patt.search(line): orbitals_txt.append(line) elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = { int(m.group(1)): [m.group(2), float(m.group(3))] } mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: #raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + \ ": Termination error or bad Gaussian output file !")
def _get_bond_type(graph) -> Dict: new_graph = deepcopy(graph) elements = [Element.from_Z(i) for i in graph["atom"]] for k, (i, j) in enumerate(zip(graph["index1"], graph["index2"])): new_graph["bond"][k] = elements[i].is_metal + elements[j].is_metal return new_graph
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") num_elec_patt = re.compile("(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile("(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile("^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile("^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)") end_mulliken_patt = re.compile("(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)") std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile("Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile("\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile("Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile("\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") mo_coeff_patt = re.compile("Molecular Orbital Coefficients:") mo_coeff_name_patt = re.compile("\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] self.eigenvalues = [] self.is_spin = False coord_txt = [] read_coord = 0 read_mulliken = False read_eigen = False eigen_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] read_mo = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append([float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend([float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False # read molecular orbital eigenvalues if read_eigen: m = orbital_patt.search(line) if m: eigen_txt.append(line) else: read_eigen = False self.eigenvalues = {Spin.up: []} for eigenline in eigen_txt: if "Alpha" in eigenline: self.eigenvalues[Spin.up] += [float(e) for e in float_patt.findall(eigenline)] elif "Beta" in eigenline: if Spin.down not in self.eigenvalues: self.eigenvalues[Spin.down] = [] self.eigenvalues[Spin.down] += [float(e) for e in float_patt.findall(eigenline)] eigen_txt = [] # read molecular orbital coefficients if read_mo: # build a matrix with all coefficients all_spin = [Spin.up] if self.is_spin: all_spin.append(Spin.down) mat_mo = {} for spin in all_spin: mat_mo[spin] = np.zeros((self.num_basis_func, self.num_basis_func)) nMO = 0 end_mo = False while nMO < self.num_basis_func and not end_mo: f.readline() f.readline() self.atom_basis_labels = [] for i in range(self.num_basis_func): line = f.readline() # identify atom and OA labels m = mo_coeff_name_patt.search(line) if m.group(1).strip() != "": iat = int(m.group(2)) - 1 # atname = m.group(3) self.atom_basis_labels.append([m.group(4)]) else: self.atom_basis_labels[iat].append(m.group(4)) # MO coefficients coeffs = [float(c) for c in float_patt.findall(line)] for j in range(len(coeffs)): mat_mo[spin][i, nMO + j] = coeffs[j] nMO += len(coeffs) line = f.readline() # manage pop=regular case (not all MO) if nMO < self.num_basis_func and ( "Density Matrix:" in line or mo_coeff_patt.search(line) ): end_mo = True warnings.warn("POP=regular case, matrix coefficients not complete") f.readline() self.eigenvectors = mat_mo read_mo = False # build a more convenient array dict with MO coefficient of # each atom in each MO. # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... } mo = {} for spin in all_spin: mo[spin] = [ [{} for iat in range(len(self.atom_basis_labels))] for j in range(self.num_basis_func) ] for j in range(self.num_basis_func): i = 0 for iat in range(len(self.atom_basis_labels)): for label in self.atom_basis_labels[iat]: mo[spin][j][iat][label] = self.eigenvectors[spin][i, j] i += 1 self.molecular_orbital = mo elif parse_freq: m = freq_patt.search(line) if m: values = [float(_v) for _v in m.groups()[0].split()] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i : i + 3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error", } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif num_elec_patt.search(line): m = num_elec_patt.search(line) self.electrons = (int(m.group(1)), int(m.group(2))) elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append(float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif not read_eigen and orbital_patt.search(line): eigen_txt.append(line) read_eigen = True elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True elif mo_coeff_patt.search(line): if "Alpha" in line: self.is_spin = True read_mo = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = {int(m.group(1)): [m.group(2), float(m.group(3))]} mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: # raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + ": Termination error or bad Gaussian output file !")