Ejemplo n.º 1
0
def get_qm7_energies():
    """
        Loads the energies of the molecules of the QM7 dataset.

        Returns
        -------
        energies: numpy array
            array containing the energies of the molecules
    """
    qm7 = fetch_qm7()
    return qm7['energies']
Ejemplo n.º 2
0
def get_qm7_positions_and_charges(sigma, overlapping_precision=1e-1):
    """
        Loads the positions and charges of the molecules of the QM7 dataset.
        QM7 is a dataset of 7165 organic molecules with up to 7 non-hydrogen
        atoms, whose energies were computed with a quantun chemistry
        computational method named Density Functional Theory.
        This dataset has been made available to train machine learning models
        to predict these energies.

        Parameters
        ----------
        sigma : float
            width parameter of the Gaussian that represents a particle

        overlapping_precision : float, optional
            affects the scaling of the positions. The positions are re-scaled
            such that two Gaussian functions of width sigma centerd at the qm7
            positions overlapp with amplitude <= the overlapping_precision

        Returns
        -------
        positions, charges, valence_charges: torch arrays
            array containing the positions, charges and valence charges
            of the QM7 database molecules
    """
    qm7 = fetch_qm7(align=True)
    positions = qm7['positions']
    charges = qm7['charges'].astype('float32')
    valence_charges = get_valence(charges)

    # normalize positions
    min_dist = np.inf
    for i in range(positions.shape[0]):
        n_atoms = np.sum(charges[i] != 0)
        pos = positions[i, :n_atoms, :]
        min_dist = min(min_dist, pdist(pos).min())
    delta = sigma * np.sqrt(-8 * np.log(overlapping_precision))
    positions = positions * delta / min_dist

    return (torch.from_numpy(positions), torch.from_numpy(charges),
            torch.from_numpy(valence_charges))
Ejemplo n.º 3
0
# Finally, we import the utility functions that let us access the QM7 dataset
# and the cache directories to store our results.

from kymatio.datasets import fetch_qm7
from kymatio.caching import get_cache_dir

###############################################################################
# Data preparation
# ----------------
#
# Fetch the QM7 database and extract the atomic positions and nuclear charges
# of each molecule. This dataset contains 7165 organic molecules with up to
# seven non-hydrogen atoms, whose energies were computed using density
# functional theory.

qm7 = fetch_qm7(align=True)
pos = qm7['positions']
full_charges = qm7['charges']

n_molecules = pos.shape[0]

###############################################################################
# From the nuclear charges, we compute the number of valence electrons, which
# we store as the valence charge of that atom.

mask = full_charges <= 2
valence_charges = full_charges * mask

mask = np.logical_and(full_charges > 2, full_charges <= 10)
valence_charges += (full_charges - 2) * mask