Example #1
0
File: ae.py Project: zhedian/RMG-Py
    def fit(self):
        """
        Fit atom energies using the provided species energies and
        corresponding atomization energies from the reference data.
        """
        self._load_refdata()

        mols = [
            Molecule().from_adjacency_list(self.ref_data[lbl].adjacency_list,
                                           raise_atomtype_exception=False,
                                           raise_charge_exception=False)
            for lbl in self.species_energies
        ]
        atom_counts = [
            Counter(atom.element.symbol for atom in mol.atoms) for mol in mols
        ]
        elements = sorted({element
                           for ac in atom_counts for element in ac},
                          key=lambda s: get_element(s).number)
        x = np.array([[ac[element] for element in elements]
                      for ac in atom_counts])  # Nmols x Nelements

        atomization_energies = np.array([
            self.ref_data[lbl].reference_data[
                self.ref_data_src].atomization_energy.value_si /
            constants.E_h / constants.Na for lbl in self.species_energies
        ])
        zpes = np.array([
            self.ref_data[lbl].reference_data[self.ref_data_src].zpe.value_si /
            constants.E_h / constants.Na for lbl in self.species_energies
        ])
        elec_energies = np.array(list(
            self.species_energies.values()))  # Should already be in Hartree
        y = atomization_energies + elec_energies + zpes

        w = np.linalg.solve(x.T @ x, x.T @ y)
        self.atom_energies = dict(zip(elements, w))

        # Get confidence intervals
        n = len(y)  # Ndata
        k = len(w)  # Nparam
        ypred = x @ w
        sigma2 = np.sum((y - ypred)**2) / (n - k - 1)  # MSE
        cov = sigma2 * np.linalg.inv(x.T @ x)  # covariance matrix
        se = np.sqrt(np.diag(cov))  # standard error
        alpha = 0.05  # 95% confidence level
        tdist = distributions.t.ppf(1 - alpha / 2, n - k - 1)  # student-t
        ci = tdist * se  # confidence interval half-width
        self.confidence_intervals = dict(zip(
            elements, ci))  # Parameter estimates are w +/- ci
Example #2
0
def geo_to_mol(coords: np.ndarray, symbols: Iterable[str] = None, nums: Iterable[int] = None) -> Molecule:
    """
    Convert molecular geometry specified by atomic coordinates and
    atomic symbols/numbers to RMG molecule.

    Use Open Babel because it's better at recognizing long bonds.
    """
    if nums is None and symbols is None:
        raise ValueError('Must specify nums or symbols')

    symbols = [get_element(int(n)).symbol for n in nums] if symbols is None else list(symbols)
    xyz = f'{len(symbols)}\n\n'
    xyz += '\n'.join(f'{s}  {c[0]: .10f}  {c[1]: .10f}  {c[2]: .10f}' for s, c in zip(symbols, coords))
    mol = pybel.readstring('xyz', xyz)
    return _pybel_to_rmg(mol)
Example #3
0
def _pybel_to_rmg(pybel_mol: pybel.Molecule) -> Molecule:
    """
    Convert Pybel molecule to RMG molecule but ignore charge,
    multiplicity, and bond orders.
    """
    mol = Molecule()
    for pybel_atom in pybel_mol:
        element = get_element(pybel_atom.atomicnum)
        atom = Atom(element=element, coords=np.array(pybel_atom.coords))
        mol.vertices.append(atom)
    for obbond in pybel.ob.OBMolBondIter(pybel_mol.OBMol):
        begin_idx = obbond.GetBeginAtomIdx() - 1  # Open Babel indexes atoms starting at 1
        end_idx = obbond.GetEndAtomIdx() - 1
        bond = Bond(mol.vertices[begin_idx], mol.vertices[end_idx])
        mol.add_bond(bond)
    return mol