def get_qm7_energies(): """ Loads the energies of the molecules of the QM7 dataset. Returns ------- energies: numpy array array containing the energies of the molecules """ qm7 = fetch_qm7() return qm7['energies']
def get_qm7_positions_and_charges(sigma, overlapping_precision=1e-1): """ Loads the positions and charges of the molecules of the QM7 dataset. QM7 is a dataset of 7165 organic molecules with up to 7 non-hydrogen atoms, whose energies were computed with a quantun chemistry computational method named Density Functional Theory. This dataset has been made available to train machine learning models to predict these energies. Parameters ---------- sigma : float width parameter of the Gaussian that represents a particle overlapping_precision : float, optional affects the scaling of the positions. The positions are re-scaled such that two Gaussian functions of width sigma centerd at the qm7 positions overlapp with amplitude <= the overlapping_precision Returns ------- positions, charges, valence_charges: torch arrays array containing the positions, charges and valence charges of the QM7 database molecules """ qm7 = fetch_qm7(align=True) positions = qm7['positions'] charges = qm7['charges'].astype('float32') valence_charges = get_valence(charges) # normalize positions min_dist = np.inf for i in range(positions.shape[0]): n_atoms = np.sum(charges[i] != 0) pos = positions[i, :n_atoms, :] min_dist = min(min_dist, pdist(pos).min()) delta = sigma * np.sqrt(-8 * np.log(overlapping_precision)) positions = positions * delta / min_dist return (torch.from_numpy(positions), torch.from_numpy(charges), torch.from_numpy(valence_charges))
# Finally, we import the utility functions that let us access the QM7 dataset # and the cache directories to store our results. from kymatio.datasets import fetch_qm7 from kymatio.caching import get_cache_dir ############################################################################### # Data preparation # ---------------- # # Fetch the QM7 database and extract the atomic positions and nuclear charges # of each molecule. This dataset contains 7165 organic molecules with up to # seven non-hydrogen atoms, whose energies were computed using density # functional theory. qm7 = fetch_qm7(align=True) pos = qm7['positions'] full_charges = qm7['charges'] n_molecules = pos.shape[0] ############################################################################### # From the nuclear charges, we compute the number of valence electrons, which # we store as the valence charge of that atom. mask = full_charges <= 2 valence_charges = full_charges * mask mask = np.logical_and(full_charges > 2, full_charges <= 10) valence_charges += (full_charges - 2) * mask