Exemple #1
0
    def test_positions(self):
        """Tests that different positions are handled correctly.
        """
        desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=True)
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1])

        desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=True,)
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1])

        desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=False,)
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1])

        desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=False,)
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1])
        self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1])

        with self.assertRaises(ValueError):
            desc.create(H2O, positions=['a'])
Exemple #2
0
def createDescriptorsAllSOAP(data,
                             species,
                             sigma_SOAP,
                             cutoff_SOAP,
                             nmax_SOAP,
                             lmax_SOAP,
                             periodic,
                             sparse_SOAP=default_sparse_SOAP):

    # Initialize SOAP
    soap = SOAP(species=species,
                sigma=sigma_SOAP,
                periodic=periodic,
                rcut=cutoff_SOAP,
                nmax=nmax_SOAP,
                lmax=lmax_SOAP,
                sparse=sparse_SOAP)

    # Compute number of features
    n_features = soap.get_number_of_features()
    n_atoms = np.shape(data[0])[0]
    n_steps = len(data)
    # Computing descriptors
    descriptors = np.empty((n_atoms, n_steps, n_features), dtype=object)
    for index_structure in tqdm.tqdm(range(n_steps)):
        descriptors[:, index_structure, :] = soap.create(data[index_structure])
    descriptors_ = []
    for atom in range(n_atoms):
        descriptors_.append(descriptors[atom, :, :])
    return descriptors_
Exemple #3
0
    def test_number_of_features(self):
        """Tests that the reported number of features is correct."""
        lmax = 5
        nmax = 5
        n_elems = 2
        desc = SOAP(species=[1, 8], rcut=3, nmax=nmax, lmax=lmax, periodic=True)

        # Test that the reported number of features matches the expected
        n_features = desc.get_number_of_features()
        expected = int((lmax + 1) * (nmax * n_elems) * (nmax * n_elems + 1) / 2)
        self.assertEqual(n_features, expected)

        # Test that the outputted number of features matches the reported
        n_features = desc.get_number_of_features()
        vec = desc.create(H2O)
        self.assertEqual(n_features, vec.shape[1])
Exemple #4
0
 def test_properties(self):
     """Used to test that changing the setup through properties works as
     intended.
     """
     # Test changing species
     a = SOAP(
         species=[1, 8],
         rcut=3,
         nmax=3,
         lmax=3,
         sparse=False,
     )
     nfeat1 = a.get_number_of_features()
     vec1 = a.create(H2O)
     a.species = ["C", "H", "O"]
     nfeat2 = a.get_number_of_features()
     vec2 = a.create(molecule("CH3OH"))
     self.assertTrue(nfeat1 != nfeat2)
     self.assertTrue(vec1.shape[1] != vec2.shape[1])
Exemple #5
0
    def test_positions(self):
        """Tests that different positions are handled correctly.
        """
        desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=True)
        n_feat = desc.get_number_of_features()
        self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape)
        self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape)
        self.assertEqual((3, n_feat), desc.create(H2O).shape)

        desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True,)
        n_feat = desc.get_number_of_features()
        self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape)
        self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape)
        self.assertEqual((3, n_feat), desc.create(H2O).shape)

        desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=False,)
        n_feat = desc.get_number_of_features()
        self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape)
        self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape)
        self.assertEqual((3, n_feat), desc.create(H2O).shape)

        desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=False,)
        n_feat = desc.get_number_of_features()
        self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape)
        self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape)
        self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape)
        self.assertEqual((3, n_feat), desc.create(H2O).shape)

        with self.assertRaises(ValueError):
            desc.create(H2O, positions=['a'])
Exemple #6
0
    def backend(sn, soap_mask, tracer_atomic_number, environment_list):
        soap = SOAP(
            species = environment_list,
            crossover = soap_opts['crossover'],
            rcut = soap_opts['cutoff'],
            nmax = soap_opts['n_max'],
            lmax = soap_opts['l_max'],
            rbf = soap_opts['rbf'],
            sigma = soap_opts['atom_sigma'],
            periodic = soap_opts['periodic'],
            sparse = False
        )

        def dscribe_soap(structure, positions):
            out = soap.create(structure, positions = positions).astype(np.float)
            return out

        dscribe_soap.n_dim = soap.get_number_of_features()

        return dscribe_soap
Exemple #7
0
class SOAPComputer(torch.nn.Module):
    def __init__(self, **config):
        super(SOAPComputer, self).__init__()
        self.soap = SOAP(**config)

    def __len__(self):
        return self.soap.get_number_of_features()

    def forward(self, species_coordinates):
        species, coordinates = species_coordinates

        if len(coordinates.shape) == 2:
            coordinates = coordinates[None, :]

        mol = Atoms(species, coordinates[0])
        #mask = [n for n,s in enumerate(mol.get_chemical_symbols()) if s==self.target]

        descriptors = []
        for coords in coordinates:
            mol.set_positions(coords)
            descriptor = self.soap.create(mol)  #, positions=mask)
            descriptors.append(descriptor)

        return torch.tensor(descriptors, dtype=torch.float64)
Exemple #8
0
    def test_parallel_sparse(self):
        """Tests creating sparse output parallelly.
        """
        # Test indices
        samples = [molecule("CO"), molecule("N2O")]
        desc = SOAP(
            species=[6, 7, 8],
            rcut=5,
            nmax=3,
            lmax=3,
            sigma=1,
            periodic=False,
            crossover=True,
            average=False,
            sparse=True,
        )
        n_features = desc.get_number_of_features()

        # Multiple systems, serial job
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=1,
        ).toarray()
        assumed = np.empty((3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[1], [0]).toarray()
        assumed[2, :] = desc.create(samples[1], [1]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test when position given as indices
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=2,
        ).toarray()
        assumed = np.empty((3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[1], [0]).toarray()
        assumed[2, :] = desc.create(samples[1], [1]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test with no positions specified
        output = desc.create(
            system=samples,
            positions=[None, None],
            n_jobs=2,
        ).toarray()

        assumed = np.empty((2+3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[0], [1]).toarray()
        assumed[2, :] = desc.create(samples[1], [0]).toarray()
        assumed[3, :] = desc.create(samples[1], [1]).toarray()
        assumed[4, :] = desc.create(samples[1], [2]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test with cartesian positions
        output = desc.create(
            system=samples,
            positions=[[[0, 0, 0], [1, 2, 0]], [[1, 2, 0]]],
            n_jobs=2,
        ).toarray()
        assumed = np.empty((2+1, n_features))
        assumed[0, :] = desc.create(samples[0], [[0, 0, 0]]).toarray()
        assumed[1, :] = desc.create(samples[0], [[1, 2, 0]]).toarray()
        assumed[2, :] = desc.create(samples[1], [[1, 2, 0]]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test averaged output
        desc._average = True
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=2,
        ).toarray()
        assumed = np.empty((2, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = 1/2*(desc.create(samples[1], [0]).toarray() + desc.create(samples[1], [1]).toarray())
        self.assertTrue(np.allclose(output, assumed))
Exemple #9
0
def create_data_SOAP(data, metadata):
    particles, scaler, test_size, rcut, nmax, lmax, N_PCA, sigma_SOAP = [
        metadata[x] for x in [
            'particles', 'scaler', 'test_size', 'rcut', 'nmax', 'lmax',
            'N_PCA', 'sigma_SOAP'
        ]
    ]

    soap = SOAP(
        species=np.unique(particles),
        sigma=sigma_SOAP,
        periodic=False,
        rcut=rcut,
        nmax=nmax,
        lmax=lmax,
        sparse=False,
        #rbf='polynomial'
    )
    nb_features = soap.get_number_of_features()

    descriptors = pd.np.empty(
        (data.index.max() + 1, len(particles), nb_features))

    for i_time in tqdm.tqdm(range(data.index.max() + 1)):
        descriptors[i_time] = soap.create(data['molec'][i_time],
                                          positions=np.arange(len(particles)))

    #create training set
    try:
        data['is_train']
    except KeyError:
        data['is_train'] = create_is_train(data.index.max() + 1)
    else:
        pass
    #selecting best params
    if N_PCA:
        try:
            metadata['PCAs']
        except KeyError:
            PCAs = select_best_params(descriptors[data['is_train'].values],
                                      nb_features, N_PCA)
            new_descriptors = pd.np.empty(
                (data.index.max() + 1, len(particles), N_PCA))
            new_descriptors[:, :2, :] = PCAs[0].transform(
                descriptors[:, :2, :].reshape(
                    descriptors[:, :2, :].shape[0] * 2,
                    nb_features)).reshape(descriptors.shape[0], 2, N_PCA)
            new_descriptors[:, 2:, :] = PCAs[1].transform(
                descriptors[:, 2:, :].reshape(
                    descriptors[:, 2:, :].shape[0] * 5,
                    nb_features)).reshape(descriptors.shape[0], 5, N_PCA)
            descriptors = new_descriptors
            metadata['old_N_feature'] = nb_features
            nb_features = N_PCA
            metadata['PCAs'] = PCAs

        else:
            PCAs = metadata['PCAs']
            new_descriptors = pd.np.empty(
                (data.index.max() + 1, len(particles), N_PCA))
            new_descriptors[:, :2, :] = PCAs[0].transform(
                descriptors[:, :2, :].reshape(
                    descriptors[:, :2, :].shape[0] * 2,
                    nb_features)).reshape(descriptors.shape[0], 2, N_PCA)
            new_descriptors[:, 2:, :] = PCAs[1].transform(
                descriptors[:, 2:, :].reshape(
                    descriptors[:, 2:, :].shape[0] * 5,
                    nb_features)).reshape(descriptors.shape[0], 5, N_PCA)
            descriptors = new_descriptors
            nb_features = N_PCA

    else:
        pass
    #scaling
    if scaler == False:
        pass

    elif type(scaler) == type(None):
        descriptors, scaler = scale_descriptors(data, descriptors)

    else:
        descriptors[:, 0:2, :] = scaler[0].transform(
            descriptors[:, 0:2, :].reshape(descriptors[:, 0:2, :].shape[0] * 2,
                                           nb_features)).reshape(
                                               descriptors.shape[0], 2,
                                               nb_features)
        descriptors[:,
                    2:, :] = scaler[1].transform(descriptors[:, 2:, :].reshape(
                        descriptors[:, 2:, :].shape[0] * 5,
                        nb_features)).reshape(descriptors.shape[0], 5,
                                              nb_features)

    metadata['scaler'] = scaler
    return data.join(pd.DataFrame({'descriptor': list(descriptors)})), metadata
Exemple #10
0
# Create SOAP output for the system
soap_water = soap.create(water, positions=[0])

print(soap_water)
print(soap_water.shape)

# Create output for multiple system
samples = [molecule("H2O"), molecule("NO2"), molecule("CO2")]
positions = [[0], [1, 2], [1, 2]]
coulomb_matrices = soap.create(samples, positions)  # Serial
coulomb_matrices = soap.create(samples, positions, n_jobs=2)  # Parallel

# Lets change the SOAP setup and see how the number of features changes
small_soap = SOAP(species=species, rcut=rcut, nmax=2, lmax=0)
big_soap = SOAP(species=species, rcut=rcut, nmax=9, lmax=9)
n_feat1 = small_soap.get_number_of_features()
n_feat2 = big_soap.get_number_of_features()
print(n_feat1, n_feat2)

# Periodic systems
from ase.build import bulk

copper = bulk('Cu', 'fcc', a=3.6, cubic=True)
print(copper.get_pbc())
periodic_soap = SOAP(species=[29],
                     rcut=rcut,
                     nmax=nmax,
                     lmax=nmax,
                     periodic=True,
                     sparse=False)
Exemple #11
0
    def test_parallel_sparse(self):
        """Tests creating sparse output parallelly."""
        # Test indices
        samples = [molecule("CO"), molecule("NO")]
        desc = SOAP(
            species=[6, 7, 8],
            rcut=5,
            nmax=3,
            lmax=3,
            sigma=1,
            periodic=False,
            crossover=True,
            average="off",
            sparse=True,
        )
        n_features = desc.get_number_of_features()

        # Multiple systems, serial job, fixed size
        output = desc.create(
            system=samples,
            positions=[[0, 1], [0, 1]],
            n_jobs=1,
        ).todense()
        assumed = np.empty((2, 2, n_features))
        assumed[0, 0] = desc.create(samples[0], [0]).todense()
        assumed[0, 1] = desc.create(samples[0], [1]).todense()
        assumed[1, 0] = desc.create(samples[1], [0]).todense()
        assumed[1, 1] = desc.create(samples[1], [1]).todense()
        self.assertTrue(np.allclose(output, assumed))

        # Multiple systems, parallel job, fixed size
        output = desc.create(
            system=samples,
            positions=[[0, 1], [0, 1]],
            n_jobs=2,
        ).todense()
        assumed = np.empty((2, 2, n_features))
        assumed[0, 0] = desc.create(samples[0], [0]).todense()
        assumed[0, 1] = desc.create(samples[0], [1]).todense()
        assumed[1, 0] = desc.create(samples[1], [0]).todense()
        assumed[1, 1] = desc.create(samples[1], [1]).todense()
        self.assertTrue(np.allclose(output, assumed))

        # Multiple systems, parallel job, all atoms, fixed size
        output = desc.create(
            system=samples,
            positions=[None, None],
            n_jobs=2,
        ).todense()
        assumed = np.empty((2, 2, n_features))
        assumed[0, 0] = desc.create(samples[0], [0]).todense()
        assumed[0, 1] = desc.create(samples[0], [1]).todense()
        assumed[1, 0] = desc.create(samples[1], [0]).todense()
        assumed[1, 1] = desc.create(samples[1], [1]).todense()
        self.assertTrue(np.allclose(output, assumed))

        # Multiple systems, parallel job, cartesian positions, fixed size
        output = desc.create(
            system=samples,
            positions=[[[0, 0, 0]], [[1, 2, 0]]],
            n_jobs=2,
        ).todense()
        assumed = np.empty((2, 1, n_features))
        assumed[0, 0] = desc.create(samples[0], [[0, 0, 0]]).todense()
        assumed[1, 0] = desc.create(samples[1], [[1, 2, 0]]).todense()
        self.assertTrue(np.allclose(output, assumed))

        # Multiple systems, parallel job, indices, variable size
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=2,
        )
        self.assertTrue(
            np.allclose(output[0][0].todense(), desc.create(samples[0], [0]).todense())
        )
        self.assertTrue(
            np.allclose(output[1][0].todense(), desc.create(samples[1], [0]).todense())
        )
        self.assertTrue(
            np.allclose(output[1][1].todense(), desc.create(samples[1], [1]).todense())
        )

        # Test averaged output
        desc.average = "outer"
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=2,
        ).todense()
        assumed = np.empty((2, n_features))
        assumed[0] = desc.create(samples[0], [0]).todense()
        assumed[1] = (
            1
            / 2
            * (
                desc.create(samples[1], [0]).todense()
                + desc.create(samples[1], [1]).todense()
            )
        )
        self.assertTrue(np.allclose(output, assumed))
Exemple #12
0
class SOAP(BaseFeaturizer):
    """
    Smooth overlap of atomic positions (interface via DScribe).

    Class for generating a partial power spectrum from Smooth Overlap of Atomic
    Orbitals (SOAP). This implementation uses real (tesseral) spherical
    harmonics as the angular basis set and provides two orthonormalized
    alternatives for the radial basis functions: spherical primitive gaussian
    type orbitals ("gto") or the polynomial basis set ("polynomial"). By
    default the faster gto-basis is used. Please see the DScribe SOAP
    documentation for more details.

    Note that SOAP is only featurized for elements identified by "fit" (see
    following), thus "fit" must be called before "featurize", or else an error
    will be raised.

    Based originally on the following publications:

    "On representing chemical environments, Albert P. Bartók, Risi
        Kondor, and Gábor Csányi, Phys. Rev. B 87, 184115, (2013),
        https://doi.org/10.1103/PhysRevB.87.184115

    "Comparing molecules and solids across structural and alchemical
        space", Sandip De, Albert P. Bartók, Gábor Csányi and Michele Ceriotti,
        Phys.  Chem. Chem. Phys. 18, 13754 (2016),
        https://doi.org/10.1039/c6cp00415f

    Implementation (and some documentation) originally based on DScribe:
    https://github.com/SINGROUP/dscribe.

    "DScribe: Library of descriptors for machine learning in materials science",
        Himanen, L., J{\"a}ger, M. O.J., Morooka, E. V., Federici
        Canova, F., Ranawat, Y. S., Gao, D. Z., Rinke, P. and Foster, A. S.
        Computer Physics Communications, 106949 (2019),
        https://doi.org/10.1016/j.cpc.2019.106949

    Args:
        rcut (float): A cutoff for local region in angstroms. Should be
            bigger than 1 angstrom.
        nmax (int): The number of radial basis functions.
        lmax (int): The maximum degree of spherical harmonics.
        sigma (float): The standard deviation of the gaussians used to expand the
            atomic density.
        rbf (str): The radial basis functions to use. The available options are:

            * "gto": Spherical gaussian type orbitals defined as :math:`g_{nl}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'l} r^l e^{-\\alpha_{n'l}r^2}`
            * "polynomial": Polynomial basis defined as :math:`g_{n}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'} (r-r_\mathrm{cut})^{n'+2}`

        periodic (bool): Determines whether the system is considered to be
            periodic.
        crossover (bool): Determines if crossover of atomic types should
            be included in the power spectrum. If enabled, the power
            spectrum is calculated over all unique species combinations Z
            and Z'. If disabled, the power spectrum does not contain
            cross-species information and is only run over each unique
            species Z. Turned on by default to correspond to the original
            definition
    """
    @requires(
        dscribe,
        "SOAPFeaturizer requires DScribe. Install from github.com/SINGROUP/dscribe"
    )
    def __init__(
        self,
        rcut,
        nmax,
        lmax,
        sigma,
        periodic,
        rbf="gto",
        crossover=True,
    ):
        self.rcut = rcut
        self.nmax = nmax
        self.lmax = lmax
        self.sigma = sigma
        self.rbf = rbf
        self.periodic = periodic
        self.crossover = crossover
        self.adaptor = AseAtomsAdaptor()
        self.length = None
        self.atomic_numbers = None
        self.soap = None
        self.n_elements = None

    def _check_fitted(self):
        if not self.soap:
            raise NotFittedError("Please fit SOAP before featurizing.")

    def fit(self, X, y=None):
        """
        Fit the SOAP featurizer to a dataframe.

        Args:
            X ([SiteCollection]): For example, a list of pymatgen Structures.
            y : unused (added for consistency with overridden method signature)

        Returns:
            self
        """
        # Check that pymatgen.Structures are provided
        if not all([isinstance(struct, Structure) for struct in X]):
            raise TypeError(
                "This fit requires an array-like input of Pymatgen "
                "Structures and sites!")

        elements = set()
        for s in X:
            c = s.composition.elements
            for e in c:
                if e.Z not in elements:
                    elements.add(e.Z)
        self.elements_sorted = sorted(list(elements))

        self.atomic_numbers = elements
        self.soap = SOAP_dscribe(species=self.atomic_numbers,
                                 rcut=self.rcut,
                                 nmax=self.nmax,
                                 lmax=self.lmax,
                                 sigma=self.sigma,
                                 rbf=self.rbf,
                                 periodic=self.periodic,
                                 crossover=self.crossover,
                                 average=False,
                                 sparse=False)

        self.length = self.soap.get_number_of_features()
        return self

    def featurize(self, struct, idx):
        self._check_fitted()
        s_ase = self.adaptor.get_atoms(struct)
        return self.soap.create(s_ase, positions=[idx]).tolist()[0]

    def feature_labels(self):
        self._check_fitted()
        labels = []
        for zi in self.elements_sorted:
            for zj in self.elements_sorted:
                for l in range(self.lmax + 1):
                    for ni in range(self.nmax):
                        for nj in range(self.nmax):
                            if nj >= ni and zj >= zi:
                                labels.append(
                                    "Z={},Z'={},l={},n={},n'={}".format(
                                        zi, zj, l, ni, nj))

        return labels

    def citations(self):
        return [
            "@article{PhysRevB.87.184115,"
            "title = {On representing chemical environments},"
            "author = {Bart\'ok, Albert P. and Kondor, Risi and Cs\'anyi, "
            "G\'abor},"
            "journal = {Phys. Rev. B},"
            "volume = {87},"
            "issue = {18},"
            "pages = {184115},"
            "numpages = {16},"
            "year = {2013},"
            "month = {May},"
            "publisher = {American Physical Society},"
            "doi = {10.1103/PhysRevB.87.184115},"
            "url = {https://link.aps.org/doi/10.1103/PhysRevB.87.184115}}",
            "@Article{C6CP00415F,"
            "author ={De, Sandip and Bartók, Albert P. and Csányi, Gábor"
            " and Ceriotti, Michele},"
            "title  ={Comparing molecules and solids across structural and "
            "alchemical space},"
            "journal = {Phys. Chem. Chem. Phys.},"
            "year = {2016},"
            "volume = {18},"
            "issue = {20},"
            "pages = {13754-13769},"
            "publisher = {The Royal Society of Chemistry},"
            "doi = {10.1039/C6CP00415F},"
            "url = {http://dx.doi.org/10.1039/C6CP00415F},}",
            '@article{dscribe, '
            'author = {Himanen, Lauri and J{\"a}ger, Marc O.~J. and '
            'Morooka, Eiaki V. and Federici Canova, Filippo and Ranawat, '
            'Yashasvi S. and Gao, David Z. and Rinke, Patrick and Foster, '
            'Adam S.}, '
            'title = {{DScribe: Library of descriptors for machine '
            'learning in materials science}}, '
            'journal = {Computer Physics Communications}, '
            'year = {2019}, pages = {106949}, '
            'doi = {https://doi.org/10.1016/j.cpc.2019.106949}}'
        ]

    def implementors(self):
        return ["Lauri Himanen and the DScribe team", "Alex Dunn"]
Exemple #13
0
import ase, pickle
import numpy as np
from dscribe.utils import AverageKernel
from ase.build import molecule
from ase.collections import g2
import time

# Choose descriptor
descriptor = "SOAP"

# Compute local descriptors
all_atomtypes = [1, 6]
#all_atomtypes = []
if descriptor == "SOAP":
    desc = SOAP(all_atomtypes, 8.0, 2, 0, periodic=False, crossover=True)
    print(desc.get_number_of_features())
elif descriptor == "ACSF":
    desc = ACSF(n_atoms_max=15,
                types=[1, 6, 7, 8],
                bond_params=[[
                    1,
                    2,
                ], [
                    4,
                    5,
                ]],
                bond_cos_params=[1, 2, 3, 4],
                ang4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                ang5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                flatten=False)
else:
    def test_numerical(self):
        """Test numerical values against a naive python implementation."""
        # Elaborate test system with multiple species, non-cubic cell, and
        # close-by atoms.
        a = 1
        system = (Atoms(
            symbols=["C", "H", "O"],
            cell=[[0, a, a], [a, 0, a], [a, a, 0]],
            scaled_positions=[
                [0, 0, 0],
                [1 / 3, 1 / 3, 1 / 3],
                [2 / 3, 2 / 3, 2 / 3],
            ],
            pbc=[True, True, True],
        ) * (3, 3, 3))
        # view(system)

        # Two centers: one in the middle, one on the edge.
        centers = [np.sum(system.get_cell(), axis=0) / 2, [0, 0, 0]]

        h = 0.0001
        n_atoms = len(system)
        n_comp = 3

        # The maximum error depends on how big the system is. With a small
        # system the error is smaller for non-periodic systems than the
        # corresponding error when periodicity is turned on. The errors become
        # equal (~1e-5) when the size of the system is increased.
        for periodic in [False]:
            for rbf in ["gto", "polynomial"]:
                for average in ["off", "outer", "inner"]:
                    soap = SOAP(
                        species=[1, 8, 6],
                        rcut=3,
                        nmax=4,
                        lmax=4,
                        rbf=rbf,
                        sparse=False,
                        average=average,
                        crossover=True,
                        periodic=periodic,
                        dtype=
                        "float64",  # The numerical derivatives require double precision
                    )
                    n_features = soap.get_number_of_features()
                    if average != "off":
                        n_centers = 1
                        derivatives_python = np.zeros(
                            (n_atoms, n_comp, n_features))
                    else:
                        n_centers = len(centers)
                        derivatives_python = np.zeros(
                            (n_centers, n_atoms, n_comp, n_features))
                    d0 = soap.create(system, centers)
                    coeffs = [-1.0 / 2.0, 1.0 / 2.0]
                    deltas = [-1.0, 1.0]
                    for i_atom in range(len(system)):
                        for i_center in range(n_centers):
                            for i_comp in range(3):
                                for i_stencil in range(2):
                                    if average == "off":
                                        i_cent = [centers[i_center]]
                                    else:
                                        i_cent = centers
                                    system_disturbed = system.copy()
                                    i_pos = system_disturbed.get_positions()
                                    i_pos[i_atom,
                                          i_comp] += h * deltas[i_stencil]
                                    system_disturbed.set_positions(i_pos)
                                    d1 = soap.create(system_disturbed, i_cent)
                                    if average != "off":
                                        derivatives_python[
                                            i_atom,
                                            i_comp, :] += (coeffs[i_stencil] *
                                                           d1 / h)
                                    else:
                                        derivatives_python[
                                            i_center, i_atom,
                                            i_comp, :] += (coeffs[i_stencil] *
                                                           d1[0, :] / h)

                    # Calculate with central finite difference implemented in C++.
                    # Try both cartesian centers and indices.
                    for c in [centers]:
                        derivatives_cpp, d_cpp = soap.derivatives(
                            system, positions=c, method="numerical")

                        # Test that descriptor values are correct
                        self.assertTrue(np.allclose(d0, d_cpp, atol=1e-6))

                        # Compare values
                        # print(np.abs(derivatives_python).max())
                        # print(derivatives_python[0,1,:,:])
                        # print(derivatives_cpp[0,0,:,:])
                        self.assertTrue(
                            np.allclose(derivatives_python,
                                        derivatives_cpp,
                                        atol=2e-5))
    def test_parallel(self):
        """Tests parallel output validity for both dense and sparse output."""
        for sparse in [False, True]:
            desc = SOAP(
                species=[1, 6, 7, 8],
                rcut=5,
                nmax=3,
                lmax=3,
                sigma=1,
                periodic=False,
                crossover=True,
                average="off",
                sparse=sparse,
            )
            n_features = desc.get_number_of_features()

            samples = [molecule("CO"), molecule("NO"), molecule("OH")]
            centers = [[0], [0], [0]]

            # Determining number of jobs based on the amount of CPUs
            # desc.derivatives(system=samples, n_jobs=-1, only_physical_cores=False)
            # desc.derivatives(system=samples, n_jobs=-1, only_physical_cores=True)

            # Perhaps most common scenario: more systems than jobs, using all
            # centers and indices
            der, des = desc.derivatives(
                system=samples,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 2, 2, 3, n_features))
            self.assertTrue(des.shape == (3, 2, n_features))
            assumed_der = np.empty((3, 2, 2, 3, n_features))
            assumed_des = np.empty((3, 2, n_features))
            desc.sparse = False
            assumed_der[0, :], assumed_des[0, :] = desc.derivatives(samples[0],
                                                                    n_jobs=1)
            assumed_der[1, :], assumed_des[1, :] = desc.derivatives(samples[1],
                                                                    n_jobs=1)
            assumed_der[2, :], assumed_des[2, :] = desc.derivatives(samples[2],
                                                                    n_jobs=1)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
                des = des.todense()
            self.assertTrue(np.allclose(assumed_der, der))
            self.assertTrue(np.allclose(assumed_des, des))

            # More systems than jobs, using all centers and indices, not requesting
            # descriptors
            der = desc.derivatives(
                system=samples,
                return_descriptor=False,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 2, 2, 3, n_features))
            assumed_der = np.empty((3, 2, 2, 3, n_features))
            desc._sparse = False
            assumed_der[0, :] = desc.derivatives(samples[0],
                                                 n_jobs=1,
                                                 return_descriptor=False)
            assumed_der[1, :] = desc.derivatives(samples[1],
                                                 n_jobs=1,
                                                 return_descriptor=False)
            assumed_der[2, :] = desc.derivatives(samples[2],
                                                 n_jobs=1,
                                                 return_descriptor=False)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
            self.assertTrue(np.allclose(assumed_der, der))

            # More systems than jobs, using custom indices as centers
            der, des = desc.derivatives(
                system=samples,
                positions=centers,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 1, 2, 3, n_features))
            self.assertTrue(des.shape == (3, 1, n_features))
            assumed_der = np.empty((3, 1, 2, 3, n_features))
            assumed_des = np.empty((3, 1, n_features))
            desc._sparse = False
            assumed_der[0, :], assumed_des[0, :] = desc.derivatives(samples[0],
                                                                    centers[0],
                                                                    n_jobs=1)
            assumed_der[1, :], assumed_des[1, :] = desc.derivatives(samples[1],
                                                                    centers[1],
                                                                    n_jobs=1)
            assumed_der[2, :], assumed_des[2, :] = desc.derivatives(samples[2],
                                                                    centers[2],
                                                                    n_jobs=1)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
                des = des.todense()
            self.assertTrue(np.allclose(assumed_der, der))
            self.assertTrue(np.allclose(assumed_des, des))

            # More systems than jobs, using custom cartesian centers
            centers = [[[0, 1, 2]], [[2, 1, 0]], [[1, 2, 0]]]
            der, des = desc.derivatives(
                system=samples,
                positions=centers,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 1, 2, 3, n_features))
            self.assertTrue(des.shape == (3, 1, n_features))
            assumed_der = np.empty((3, 1, 2, 3, n_features))
            assumed_des = np.empty((3, 1, n_features))
            desc._sparse = False
            assumed_der[0, :], assumed_des[0, :] = desc.derivatives(samples[0],
                                                                    centers[0],
                                                                    n_jobs=1)
            assumed_der[1, :], assumed_des[1, :] = desc.derivatives(samples[1],
                                                                    centers[1],
                                                                    n_jobs=1)
            assumed_der[2, :], assumed_des[2, :] = desc.derivatives(samples[2],
                                                                    centers[2],
                                                                    n_jobs=1)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
                des = des.todense()
            self.assertTrue(np.allclose(assumed_der, der))
            self.assertTrue(np.allclose(assumed_des, des))

            # Includes
            includes = [[0], [0], [0]]
            der, des = desc.derivatives(
                system=samples,
                include=includes,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 2, 1, 3, n_features))
            self.assertTrue(des.shape == (3, 2, n_features))
            assumed_der = np.empty((3, 2, 1, 3, n_features))
            assumed_des = np.empty((3, 2, n_features))
            desc._sparse = False
            assumed_der[0, :], assumed_des[0, :] = desc.derivatives(
                samples[0], include=includes[0], n_jobs=1)
            assumed_der[1, :], assumed_des[1, :] = desc.derivatives(
                samples[1], include=includes[1], n_jobs=1)
            assumed_der[2, :], assumed_des[2, :] = desc.derivatives(
                samples[2], include=includes[2], n_jobs=1)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
                des = des.todense()
            self.assertTrue(np.allclose(assumed_der, der))
            self.assertTrue(np.allclose(assumed_des, des))

            # Excludes
            excludes = [[0], [0], [0]]
            der, des = desc.derivatives(
                system=samples,
                exclude=excludes,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 2, 1, 3, n_features))
            self.assertTrue(des.shape == (3, 2, n_features))
            assumed_der = np.empty((3, 2, 1, 3, n_features))
            assumed_des = np.empty((3, 2, n_features))
            desc._sparse = False
            assumed_der[0, :], assumed_des[0, :] = desc.derivatives(
                samples[0], exclude=excludes[0], n_jobs=1)
            assumed_der[1, :], assumed_des[1, :] = desc.derivatives(
                samples[1], exclude=excludes[1], n_jobs=1)
            assumed_der[2, :], assumed_des[2, :] = desc.derivatives(
                samples[2], exclude=excludes[2], n_jobs=1)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
                des = des.todense()
            self.assertTrue(np.allclose(assumed_der, der))
            self.assertTrue(np.allclose(assumed_des, des))

            # Test averaged output
            desc.average = "inner"
            positions = [[0], [0, 1], [1]]
            der, des = desc.derivatives(
                system=samples,
                positions=positions,
                n_jobs=2,
            )
            self.assertTrue(der.shape == (3, 1, 2, 3, n_features))
            self.assertTrue(des.shape == (3, 1, n_features))
            desc._sparse = False
            assumed_der = np.empty((3, 1, 2, 3, n_features))
            assumed_des = np.empty((3, 1, n_features))
            assumed_der[0, :], assumed_des[0, :] = desc.derivatives(
                samples[0], positions=positions[0], n_jobs=1)
            assumed_der[1, :], assumed_des[1, :] = desc.derivatives(
                samples[1], positions=positions[1], n_jobs=1)
            assumed_der[2, :], assumed_des[2, :] = desc.derivatives(
                samples[2], positions=positions[2], n_jobs=1)
            desc._sparse = sparse
            if sparse:
                der = der.todense()
                des = des.todense()
            self.assertTrue(np.allclose(assumed_der, der))
            self.assertTrue(np.allclose(assumed_des, des))

            # Variable size list output, as the systems have a different size
            desc.average = "off"
            samples = [molecule("CO"), molecule("NO2"), molecule("OH")]
            der, des = desc.derivatives(
                system=samples,
                n_jobs=2,
            )
            self.assertTrue(isinstance(der, list))
            self.assertTrue(der[0].shape == (2, 2, 3, n_features))
            self.assertTrue(der[1].shape == (3, 3, 3, n_features))
            self.assertTrue(der[2].shape == (2, 2, 3, n_features))
            desc._sparse = False
            for i in range(len(samples)):
                assumed_der, assumed_des = desc.derivatives(samples[i],
                                                            n_jobs=1)
                i_der = der[i]
                i_des = des[i]
                if sparse:
                    i_der = i_der.todense()
                    i_des = i_des.todense()
                self.assertTrue(np.allclose(assumed_der, i_der))
                self.assertTrue(np.allclose(assumed_des, i_des))
            desc._sparse = sparse
temp = df['temp']
load = df['load']
print('Got training targets from spread sheet')

species = set()
for i in range(len(atoms)):
    species.update(atoms[i].get_chemical_symbols())

soap = SOAP(species=species,
            periodic=True,
            rcut=5,
            nmax=1,
            lmax=1,
            average="outer")
print('Training hardness model...')
soap.get_number_of_features()

feature_vectors = soap.create(atoms, n_jobs=1)
feature_tensor = th.tensor(feature_vectors)
#print('DONE, SOAP descriptors ready to use')
feature_pd = pd.DataFrame(feature_vectors)  #feature pd is the soap descriptors

#generate compositional descriptors
df = pd.read_excel(
    '/Users/ziyanzhang/Downloads/dscribe/examples/hv_temp_cif_labels.xlsx')


class Vectorize_Formula:
    def __init__(self):
        elem_dict = pd.read_excel(
            '/Users/ziyanzhang/Desktop/subgroup/elementsnew.xlsx'