def fit(self): """ Fit atom energies using the provided species energies and corresponding atomization energies from the reference data. """ self._load_refdata() mols = [ Molecule().from_adjacency_list(self.ref_data[lbl].adjacency_list, raise_atomtype_exception=False, raise_charge_exception=False) for lbl in self.species_energies ] atom_counts = [ Counter(atom.element.symbol for atom in mol.atoms) for mol in mols ] elements = sorted({element for ac in atom_counts for element in ac}, key=lambda s: get_element(s).number) x = np.array([[ac[element] for element in elements] for ac in atom_counts]) # Nmols x Nelements atomization_energies = np.array([ self.ref_data[lbl].reference_data[ self.ref_data_src].atomization_energy.value_si / constants.E_h / constants.Na for lbl in self.species_energies ]) zpes = np.array([ self.ref_data[lbl].reference_data[self.ref_data_src].zpe.value_si / constants.E_h / constants.Na for lbl in self.species_energies ]) elec_energies = np.array(list( self.species_energies.values())) # Should already be in Hartree y = atomization_energies + elec_energies + zpes w = np.linalg.solve(x.T @ x, x.T @ y) self.atom_energies = dict(zip(elements, w)) # Get confidence intervals n = len(y) # Ndata k = len(w) # Nparam ypred = x @ w sigma2 = np.sum((y - ypred)**2) / (n - k - 1) # MSE cov = sigma2 * np.linalg.inv(x.T @ x) # covariance matrix se = np.sqrt(np.diag(cov)) # standard error alpha = 0.05 # 95% confidence level tdist = distributions.t.ppf(1 - alpha / 2, n - k - 1) # student-t ci = tdist * se # confidence interval half-width self.confidence_intervals = dict(zip( elements, ci)) # Parameter estimates are w +/- ci
def geo_to_mol(coords: np.ndarray, symbols: Iterable[str] = None, nums: Iterable[int] = None) -> Molecule: """ Convert molecular geometry specified by atomic coordinates and atomic symbols/numbers to RMG molecule. Use Open Babel because it's better at recognizing long bonds. """ if nums is None and symbols is None: raise ValueError('Must specify nums or symbols') symbols = [get_element(int(n)).symbol for n in nums] if symbols is None else list(symbols) xyz = f'{len(symbols)}\n\n' xyz += '\n'.join(f'{s} {c[0]: .10f} {c[1]: .10f} {c[2]: .10f}' for s, c in zip(symbols, coords)) mol = pybel.readstring('xyz', xyz) return _pybel_to_rmg(mol)
def _pybel_to_rmg(pybel_mol: pybel.Molecule) -> Molecule: """ Convert Pybel molecule to RMG molecule but ignore charge, multiplicity, and bond orders. """ mol = Molecule() for pybel_atom in pybel_mol: element = get_element(pybel_atom.atomicnum) atom = Atom(element=element, coords=np.array(pybel_atom.coords)) mol.vertices.append(atom) for obbond in pybel.ob.OBMolBondIter(pybel_mol.OBMol): begin_idx = obbond.GetBeginAtomIdx() - 1 # Open Babel indexes atoms starting at 1 end_idx = obbond.GetEndAtomIdx() - 1 bond = Bond(mol.vertices[begin_idx], mol.vertices[end_idx]) mol.add_bond(bond) return mol