def test_positions(self): """Tests that different positions are handled correctly. """ desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=True) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1]) desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=True,) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1]) desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=False,) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1]) desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=False,) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[[0, 0, 0]]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O, positions=[0]).shape[1]) self.assertEqual(desc.get_number_of_features(), desc.create(H2O).shape[1]) with self.assertRaises(ValueError): desc.create(H2O, positions=['a'])
def createDescriptorsAllSOAP(data, species, sigma_SOAP, cutoff_SOAP, nmax_SOAP, lmax_SOAP, periodic, sparse_SOAP=default_sparse_SOAP): # Initialize SOAP soap = SOAP(species=species, sigma=sigma_SOAP, periodic=periodic, rcut=cutoff_SOAP, nmax=nmax_SOAP, lmax=lmax_SOAP, sparse=sparse_SOAP) # Compute number of features n_features = soap.get_number_of_features() n_atoms = np.shape(data[0])[0] n_steps = len(data) # Computing descriptors descriptors = np.empty((n_atoms, n_steps, n_features), dtype=object) for index_structure in tqdm.tqdm(range(n_steps)): descriptors[:, index_structure, :] = soap.create(data[index_structure]) descriptors_ = [] for atom in range(n_atoms): descriptors_.append(descriptors[atom, :, :]) return descriptors_
def test_number_of_features(self): """Tests that the reported number of features is correct.""" lmax = 5 nmax = 5 n_elems = 2 desc = SOAP(species=[1, 8], rcut=3, nmax=nmax, lmax=lmax, periodic=True) # Test that the reported number of features matches the expected n_features = desc.get_number_of_features() expected = int((lmax + 1) * (nmax * n_elems) * (nmax * n_elems + 1) / 2) self.assertEqual(n_features, expected) # Test that the outputted number of features matches the reported n_features = desc.get_number_of_features() vec = desc.create(H2O) self.assertEqual(n_features, vec.shape[1])
def test_properties(self): """Used to test that changing the setup through properties works as intended. """ # Test changing species a = SOAP( species=[1, 8], rcut=3, nmax=3, lmax=3, sparse=False, ) nfeat1 = a.get_number_of_features() vec1 = a.create(H2O) a.species = ["C", "H", "O"] nfeat2 = a.get_number_of_features() vec2 = a.create(molecule("CH3OH")) self.assertTrue(nfeat1 != nfeat2) self.assertTrue(vec1.shape[1] != vec2.shape[1])
def test_positions(self): """Tests that different positions are handled correctly. """ desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=True) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True,) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=False,) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=False,) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) with self.assertRaises(ValueError): desc.create(H2O, positions=['a'])
def backend(sn, soap_mask, tracer_atomic_number, environment_list): soap = SOAP( species = environment_list, crossover = soap_opts['crossover'], rcut = soap_opts['cutoff'], nmax = soap_opts['n_max'], lmax = soap_opts['l_max'], rbf = soap_opts['rbf'], sigma = soap_opts['atom_sigma'], periodic = soap_opts['periodic'], sparse = False ) def dscribe_soap(structure, positions): out = soap.create(structure, positions = positions).astype(np.float) return out dscribe_soap.n_dim = soap.get_number_of_features() return dscribe_soap
class SOAPComputer(torch.nn.Module): def __init__(self, **config): super(SOAPComputer, self).__init__() self.soap = SOAP(**config) def __len__(self): return self.soap.get_number_of_features() def forward(self, species_coordinates): species, coordinates = species_coordinates if len(coordinates.shape) == 2: coordinates = coordinates[None, :] mol = Atoms(species, coordinates[0]) #mask = [n for n,s in enumerate(mol.get_chemical_symbols()) if s==self.target] descriptors = [] for coords in coordinates: mol.set_positions(coords) descriptor = self.soap.create(mol) #, positions=mask) descriptors.append(descriptor) return torch.tensor(descriptors, dtype=torch.float64)
def test_parallel_sparse(self): """Tests creating sparse output parallelly. """ # Test indices samples = [molecule("CO"), molecule("N2O")] desc = SOAP( species=[6, 7, 8], rcut=5, nmax=3, lmax=3, sigma=1, periodic=False, crossover=True, average=False, sparse=True, ) n_features = desc.get_number_of_features() # Multiple systems, serial job output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=1, ).toarray() assumed = np.empty((3, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = desc.create(samples[1], [0]).toarray() assumed[2, :] = desc.create(samples[1], [1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test when position given as indices output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=2, ).toarray() assumed = np.empty((3, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = desc.create(samples[1], [0]).toarray() assumed[2, :] = desc.create(samples[1], [1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test with no positions specified output = desc.create( system=samples, positions=[None, None], n_jobs=2, ).toarray() assumed = np.empty((2+3, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = desc.create(samples[0], [1]).toarray() assumed[2, :] = desc.create(samples[1], [0]).toarray() assumed[3, :] = desc.create(samples[1], [1]).toarray() assumed[4, :] = desc.create(samples[1], [2]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test with cartesian positions output = desc.create( system=samples, positions=[[[0, 0, 0], [1, 2, 0]], [[1, 2, 0]]], n_jobs=2, ).toarray() assumed = np.empty((2+1, n_features)) assumed[0, :] = desc.create(samples[0], [[0, 0, 0]]).toarray() assumed[1, :] = desc.create(samples[0], [[1, 2, 0]]).toarray() assumed[2, :] = desc.create(samples[1], [[1, 2, 0]]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test averaged output desc._average = True output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=2, ).toarray() assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = 1/2*(desc.create(samples[1], [0]).toarray() + desc.create(samples[1], [1]).toarray()) self.assertTrue(np.allclose(output, assumed))
def create_data_SOAP(data, metadata): particles, scaler, test_size, rcut, nmax, lmax, N_PCA, sigma_SOAP = [ metadata[x] for x in [ 'particles', 'scaler', 'test_size', 'rcut', 'nmax', 'lmax', 'N_PCA', 'sigma_SOAP' ] ] soap = SOAP( species=np.unique(particles), sigma=sigma_SOAP, periodic=False, rcut=rcut, nmax=nmax, lmax=lmax, sparse=False, #rbf='polynomial' ) nb_features = soap.get_number_of_features() descriptors = pd.np.empty( (data.index.max() + 1, len(particles), nb_features)) for i_time in tqdm.tqdm(range(data.index.max() + 1)): descriptors[i_time] = soap.create(data['molec'][i_time], positions=np.arange(len(particles))) #create training set try: data['is_train'] except KeyError: data['is_train'] = create_is_train(data.index.max() + 1) else: pass #selecting best params if N_PCA: try: metadata['PCAs'] except KeyError: PCAs = select_best_params(descriptors[data['is_train'].values], nb_features, N_PCA) new_descriptors = pd.np.empty( (data.index.max() + 1, len(particles), N_PCA)) new_descriptors[:, :2, :] = PCAs[0].transform( descriptors[:, :2, :].reshape( descriptors[:, :2, :].shape[0] * 2, nb_features)).reshape(descriptors.shape[0], 2, N_PCA) new_descriptors[:, 2:, :] = PCAs[1].transform( descriptors[:, 2:, :].reshape( descriptors[:, 2:, :].shape[0] * 5, nb_features)).reshape(descriptors.shape[0], 5, N_PCA) descriptors = new_descriptors metadata['old_N_feature'] = nb_features nb_features = N_PCA metadata['PCAs'] = PCAs else: PCAs = metadata['PCAs'] new_descriptors = pd.np.empty( (data.index.max() + 1, len(particles), N_PCA)) new_descriptors[:, :2, :] = PCAs[0].transform( descriptors[:, :2, :].reshape( descriptors[:, :2, :].shape[0] * 2, nb_features)).reshape(descriptors.shape[0], 2, N_PCA) new_descriptors[:, 2:, :] = PCAs[1].transform( descriptors[:, 2:, :].reshape( descriptors[:, 2:, :].shape[0] * 5, nb_features)).reshape(descriptors.shape[0], 5, N_PCA) descriptors = new_descriptors nb_features = N_PCA else: pass #scaling if scaler == False: pass elif type(scaler) == type(None): descriptors, scaler = scale_descriptors(data, descriptors) else: descriptors[:, 0:2, :] = scaler[0].transform( descriptors[:, 0:2, :].reshape(descriptors[:, 0:2, :].shape[0] * 2, nb_features)).reshape( descriptors.shape[0], 2, nb_features) descriptors[:, 2:, :] = scaler[1].transform(descriptors[:, 2:, :].reshape( descriptors[:, 2:, :].shape[0] * 5, nb_features)).reshape(descriptors.shape[0], 5, nb_features) metadata['scaler'] = scaler return data.join(pd.DataFrame({'descriptor': list(descriptors)})), metadata
# Create SOAP output for the system soap_water = soap.create(water, positions=[0]) print(soap_water) print(soap_water.shape) # Create output for multiple system samples = [molecule("H2O"), molecule("NO2"), molecule("CO2")] positions = [[0], [1, 2], [1, 2]] coulomb_matrices = soap.create(samples, positions) # Serial coulomb_matrices = soap.create(samples, positions, n_jobs=2) # Parallel # Lets change the SOAP setup and see how the number of features changes small_soap = SOAP(species=species, rcut=rcut, nmax=2, lmax=0) big_soap = SOAP(species=species, rcut=rcut, nmax=9, lmax=9) n_feat1 = small_soap.get_number_of_features() n_feat2 = big_soap.get_number_of_features() print(n_feat1, n_feat2) # Periodic systems from ase.build import bulk copper = bulk('Cu', 'fcc', a=3.6, cubic=True) print(copper.get_pbc()) periodic_soap = SOAP(species=[29], rcut=rcut, nmax=nmax, lmax=nmax, periodic=True, sparse=False)
def test_parallel_sparse(self): """Tests creating sparse output parallelly.""" # Test indices samples = [molecule("CO"), molecule("NO")] desc = SOAP( species=[6, 7, 8], rcut=5, nmax=3, lmax=3, sigma=1, periodic=False, crossover=True, average="off", sparse=True, ) n_features = desc.get_number_of_features() # Multiple systems, serial job, fixed size output = desc.create( system=samples, positions=[[0, 1], [0, 1]], n_jobs=1, ).todense() assumed = np.empty((2, 2, n_features)) assumed[0, 0] = desc.create(samples[0], [0]).todense() assumed[0, 1] = desc.create(samples[0], [1]).todense() assumed[1, 0] = desc.create(samples[1], [0]).todense() assumed[1, 1] = desc.create(samples[1], [1]).todense() self.assertTrue(np.allclose(output, assumed)) # Multiple systems, parallel job, fixed size output = desc.create( system=samples, positions=[[0, 1], [0, 1]], n_jobs=2, ).todense() assumed = np.empty((2, 2, n_features)) assumed[0, 0] = desc.create(samples[0], [0]).todense() assumed[0, 1] = desc.create(samples[0], [1]).todense() assumed[1, 0] = desc.create(samples[1], [0]).todense() assumed[1, 1] = desc.create(samples[1], [1]).todense() self.assertTrue(np.allclose(output, assumed)) # Multiple systems, parallel job, all atoms, fixed size output = desc.create( system=samples, positions=[None, None], n_jobs=2, ).todense() assumed = np.empty((2, 2, n_features)) assumed[0, 0] = desc.create(samples[0], [0]).todense() assumed[0, 1] = desc.create(samples[0], [1]).todense() assumed[1, 0] = desc.create(samples[1], [0]).todense() assumed[1, 1] = desc.create(samples[1], [1]).todense() self.assertTrue(np.allclose(output, assumed)) # Multiple systems, parallel job, cartesian positions, fixed size output = desc.create( system=samples, positions=[[[0, 0, 0]], [[1, 2, 0]]], n_jobs=2, ).todense() assumed = np.empty((2, 1, n_features)) assumed[0, 0] = desc.create(samples[0], [[0, 0, 0]]).todense() assumed[1, 0] = desc.create(samples[1], [[1, 2, 0]]).todense() self.assertTrue(np.allclose(output, assumed)) # Multiple systems, parallel job, indices, variable size output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=2, ) self.assertTrue( np.allclose(output[0][0].todense(), desc.create(samples[0], [0]).todense()) ) self.assertTrue( np.allclose(output[1][0].todense(), desc.create(samples[1], [0]).todense()) ) self.assertTrue( np.allclose(output[1][1].todense(), desc.create(samples[1], [1]).todense()) ) # Test averaged output desc.average = "outer" output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=2, ).todense() assumed = np.empty((2, n_features)) assumed[0] = desc.create(samples[0], [0]).todense() assumed[1] = ( 1 / 2 * ( desc.create(samples[1], [0]).todense() + desc.create(samples[1], [1]).todense() ) ) self.assertTrue(np.allclose(output, assumed))
class SOAP(BaseFeaturizer): """ Smooth overlap of atomic positions (interface via DScribe). Class for generating a partial power spectrum from Smooth Overlap of Atomic Orbitals (SOAP). This implementation uses real (tesseral) spherical harmonics as the angular basis set and provides two orthonormalized alternatives for the radial basis functions: spherical primitive gaussian type orbitals ("gto") or the polynomial basis set ("polynomial"). By default the faster gto-basis is used. Please see the DScribe SOAP documentation for more details. Note that SOAP is only featurized for elements identified by "fit" (see following), thus "fit" must be called before "featurize", or else an error will be raised. Based originally on the following publications: "On representing chemical environments, Albert P. Bartók, Risi Kondor, and Gábor Csányi, Phys. Rev. B 87, 184115, (2013), https://doi.org/10.1103/PhysRevB.87.184115 "Comparing molecules and solids across structural and alchemical space", Sandip De, Albert P. Bartók, Gábor Csányi and Michele Ceriotti, Phys. Chem. Chem. Phys. 18, 13754 (2016), https://doi.org/10.1039/c6cp00415f Implementation (and some documentation) originally based on DScribe: https://github.com/SINGROUP/dscribe. "DScribe: Library of descriptors for machine learning in materials science", Himanen, L., J{\"a}ger, M. O.J., Morooka, E. V., Federici Canova, F., Ranawat, Y. S., Gao, D. Z., Rinke, P. and Foster, A. S. Computer Physics Communications, 106949 (2019), https://doi.org/10.1016/j.cpc.2019.106949 Args: rcut (float): A cutoff for local region in angstroms. Should be bigger than 1 angstrom. nmax (int): The number of radial basis functions. lmax (int): The maximum degree of spherical harmonics. sigma (float): The standard deviation of the gaussians used to expand the atomic density. rbf (str): The radial basis functions to use. The available options are: * "gto": Spherical gaussian type orbitals defined as :math:`g_{nl}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'l} r^l e^{-\\alpha_{n'l}r^2}` * "polynomial": Polynomial basis defined as :math:`g_{n}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'} (r-r_\mathrm{cut})^{n'+2}` periodic (bool): Determines whether the system is considered to be periodic. crossover (bool): Determines if crossover of atomic types should be included in the power spectrum. If enabled, the power spectrum is calculated over all unique species combinations Z and Z'. If disabled, the power spectrum does not contain cross-species information and is only run over each unique species Z. Turned on by default to correspond to the original definition """ @requires( dscribe, "SOAPFeaturizer requires DScribe. Install from github.com/SINGROUP/dscribe" ) def __init__( self, rcut, nmax, lmax, sigma, periodic, rbf="gto", crossover=True, ): self.rcut = rcut self.nmax = nmax self.lmax = lmax self.sigma = sigma self.rbf = rbf self.periodic = periodic self.crossover = crossover self.adaptor = AseAtomsAdaptor() self.length = None self.atomic_numbers = None self.soap = None self.n_elements = None def _check_fitted(self): if not self.soap: raise NotFittedError("Please fit SOAP before featurizing.") def fit(self, X, y=None): """ Fit the SOAP featurizer to a dataframe. Args: X ([SiteCollection]): For example, a list of pymatgen Structures. y : unused (added for consistency with overridden method signature) Returns: self """ # Check that pymatgen.Structures are provided if not all([isinstance(struct, Structure) for struct in X]): raise TypeError( "This fit requires an array-like input of Pymatgen " "Structures and sites!") elements = set() for s in X: c = s.composition.elements for e in c: if e.Z not in elements: elements.add(e.Z) self.elements_sorted = sorted(list(elements)) self.atomic_numbers = elements self.soap = SOAP_dscribe(species=self.atomic_numbers, rcut=self.rcut, nmax=self.nmax, lmax=self.lmax, sigma=self.sigma, rbf=self.rbf, periodic=self.periodic, crossover=self.crossover, average=False, sparse=False) self.length = self.soap.get_number_of_features() return self def featurize(self, struct, idx): self._check_fitted() s_ase = self.adaptor.get_atoms(struct) return self.soap.create(s_ase, positions=[idx]).tolist()[0] def feature_labels(self): self._check_fitted() labels = [] for zi in self.elements_sorted: for zj in self.elements_sorted: for l in range(self.lmax + 1): for ni in range(self.nmax): for nj in range(self.nmax): if nj >= ni and zj >= zi: labels.append( "Z={},Z'={},l={},n={},n'={}".format( zi, zj, l, ni, nj)) return labels def citations(self): return [ "@article{PhysRevB.87.184115," "title = {On representing chemical environments}," "author = {Bart\'ok, Albert P. and Kondor, Risi and Cs\'anyi, " "G\'abor}," "journal = {Phys. Rev. B}," "volume = {87}," "issue = {18}," "pages = {184115}," "numpages = {16}," "year = {2013}," "month = {May}," "publisher = {American Physical Society}," "doi = {10.1103/PhysRevB.87.184115}," "url = {https://link.aps.org/doi/10.1103/PhysRevB.87.184115}}", "@Article{C6CP00415F," "author ={De, Sandip and Bartók, Albert P. and Csányi, Gábor" " and Ceriotti, Michele}," "title ={Comparing molecules and solids across structural and " "alchemical space}," "journal = {Phys. Chem. Chem. Phys.}," "year = {2016}," "volume = {18}," "issue = {20}," "pages = {13754-13769}," "publisher = {The Royal Society of Chemistry}," "doi = {10.1039/C6CP00415F}," "url = {http://dx.doi.org/10.1039/C6CP00415F},}", '@article{dscribe, ' 'author = {Himanen, Lauri and J{\"a}ger, Marc O.~J. and ' 'Morooka, Eiaki V. and Federici Canova, Filippo and Ranawat, ' 'Yashasvi S. and Gao, David Z. and Rinke, Patrick and Foster, ' 'Adam S.}, ' 'title = {{DScribe: Library of descriptors for machine ' 'learning in materials science}}, ' 'journal = {Computer Physics Communications}, ' 'year = {2019}, pages = {106949}, ' 'doi = {https://doi.org/10.1016/j.cpc.2019.106949}}' ] def implementors(self): return ["Lauri Himanen and the DScribe team", "Alex Dunn"]
import ase, pickle import numpy as np from dscribe.utils import AverageKernel from ase.build import molecule from ase.collections import g2 import time # Choose descriptor descriptor = "SOAP" # Compute local descriptors all_atomtypes = [1, 6] #all_atomtypes = [] if descriptor == "SOAP": desc = SOAP(all_atomtypes, 8.0, 2, 0, periodic=False, crossover=True) print(desc.get_number_of_features()) elif descriptor == "ACSF": desc = ACSF(n_atoms_max=15, types=[1, 6, 7, 8], bond_params=[[ 1, 2, ], [ 4, 5, ]], bond_cos_params=[1, 2, 3, 4], ang4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], ang5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], flatten=False) else:
def test_numerical(self): """Test numerical values against a naive python implementation.""" # Elaborate test system with multiple species, non-cubic cell, and # close-by atoms. a = 1 system = (Atoms( symbols=["C", "H", "O"], cell=[[0, a, a], [a, 0, a], [a, a, 0]], scaled_positions=[ [0, 0, 0], [1 / 3, 1 / 3, 1 / 3], [2 / 3, 2 / 3, 2 / 3], ], pbc=[True, True, True], ) * (3, 3, 3)) # view(system) # Two centers: one in the middle, one on the edge. centers = [np.sum(system.get_cell(), axis=0) / 2, [0, 0, 0]] h = 0.0001 n_atoms = len(system) n_comp = 3 # The maximum error depends on how big the system is. With a small # system the error is smaller for non-periodic systems than the # corresponding error when periodicity is turned on. The errors become # equal (~1e-5) when the size of the system is increased. for periodic in [False]: for rbf in ["gto", "polynomial"]: for average in ["off", "outer", "inner"]: soap = SOAP( species=[1, 8, 6], rcut=3, nmax=4, lmax=4, rbf=rbf, sparse=False, average=average, crossover=True, periodic=periodic, dtype= "float64", # The numerical derivatives require double precision ) n_features = soap.get_number_of_features() if average != "off": n_centers = 1 derivatives_python = np.zeros( (n_atoms, n_comp, n_features)) else: n_centers = len(centers) derivatives_python = np.zeros( (n_centers, n_atoms, n_comp, n_features)) d0 = soap.create(system, centers) coeffs = [-1.0 / 2.0, 1.0 / 2.0] deltas = [-1.0, 1.0] for i_atom in range(len(system)): for i_center in range(n_centers): for i_comp in range(3): for i_stencil in range(2): if average == "off": i_cent = [centers[i_center]] else: i_cent = centers system_disturbed = system.copy() i_pos = system_disturbed.get_positions() i_pos[i_atom, i_comp] += h * deltas[i_stencil] system_disturbed.set_positions(i_pos) d1 = soap.create(system_disturbed, i_cent) if average != "off": derivatives_python[ i_atom, i_comp, :] += (coeffs[i_stencil] * d1 / h) else: derivatives_python[ i_center, i_atom, i_comp, :] += (coeffs[i_stencil] * d1[0, :] / h) # Calculate with central finite difference implemented in C++. # Try both cartesian centers and indices. for c in [centers]: derivatives_cpp, d_cpp = soap.derivatives( system, positions=c, method="numerical") # Test that descriptor values are correct self.assertTrue(np.allclose(d0, d_cpp, atol=1e-6)) # Compare values # print(np.abs(derivatives_python).max()) # print(derivatives_python[0,1,:,:]) # print(derivatives_cpp[0,0,:,:]) self.assertTrue( np.allclose(derivatives_python, derivatives_cpp, atol=2e-5))
def test_parallel(self): """Tests parallel output validity for both dense and sparse output.""" for sparse in [False, True]: desc = SOAP( species=[1, 6, 7, 8], rcut=5, nmax=3, lmax=3, sigma=1, periodic=False, crossover=True, average="off", sparse=sparse, ) n_features = desc.get_number_of_features() samples = [molecule("CO"), molecule("NO"), molecule("OH")] centers = [[0], [0], [0]] # Determining number of jobs based on the amount of CPUs # desc.derivatives(system=samples, n_jobs=-1, only_physical_cores=False) # desc.derivatives(system=samples, n_jobs=-1, only_physical_cores=True) # Perhaps most common scenario: more systems than jobs, using all # centers and indices der, des = desc.derivatives( system=samples, n_jobs=2, ) self.assertTrue(der.shape == (3, 2, 2, 3, n_features)) self.assertTrue(des.shape == (3, 2, n_features)) assumed_der = np.empty((3, 2, 2, 3, n_features)) assumed_des = np.empty((3, 2, n_features)) desc.sparse = False assumed_der[0, :], assumed_des[0, :] = desc.derivatives(samples[0], n_jobs=1) assumed_der[1, :], assumed_des[1, :] = desc.derivatives(samples[1], n_jobs=1) assumed_der[2, :], assumed_des[2, :] = desc.derivatives(samples[2], n_jobs=1) desc._sparse = sparse if sparse: der = der.todense() des = des.todense() self.assertTrue(np.allclose(assumed_der, der)) self.assertTrue(np.allclose(assumed_des, des)) # More systems than jobs, using all centers and indices, not requesting # descriptors der = desc.derivatives( system=samples, return_descriptor=False, n_jobs=2, ) self.assertTrue(der.shape == (3, 2, 2, 3, n_features)) assumed_der = np.empty((3, 2, 2, 3, n_features)) desc._sparse = False assumed_der[0, :] = desc.derivatives(samples[0], n_jobs=1, return_descriptor=False) assumed_der[1, :] = desc.derivatives(samples[1], n_jobs=1, return_descriptor=False) assumed_der[2, :] = desc.derivatives(samples[2], n_jobs=1, return_descriptor=False) desc._sparse = sparse if sparse: der = der.todense() self.assertTrue(np.allclose(assumed_der, der)) # More systems than jobs, using custom indices as centers der, des = desc.derivatives( system=samples, positions=centers, n_jobs=2, ) self.assertTrue(der.shape == (3, 1, 2, 3, n_features)) self.assertTrue(des.shape == (3, 1, n_features)) assumed_der = np.empty((3, 1, 2, 3, n_features)) assumed_des = np.empty((3, 1, n_features)) desc._sparse = False assumed_der[0, :], assumed_des[0, :] = desc.derivatives(samples[0], centers[0], n_jobs=1) assumed_der[1, :], assumed_des[1, :] = desc.derivatives(samples[1], centers[1], n_jobs=1) assumed_der[2, :], assumed_des[2, :] = desc.derivatives(samples[2], centers[2], n_jobs=1) desc._sparse = sparse if sparse: der = der.todense() des = des.todense() self.assertTrue(np.allclose(assumed_der, der)) self.assertTrue(np.allclose(assumed_des, des)) # More systems than jobs, using custom cartesian centers centers = [[[0, 1, 2]], [[2, 1, 0]], [[1, 2, 0]]] der, des = desc.derivatives( system=samples, positions=centers, n_jobs=2, ) self.assertTrue(der.shape == (3, 1, 2, 3, n_features)) self.assertTrue(des.shape == (3, 1, n_features)) assumed_der = np.empty((3, 1, 2, 3, n_features)) assumed_des = np.empty((3, 1, n_features)) desc._sparse = False assumed_der[0, :], assumed_des[0, :] = desc.derivatives(samples[0], centers[0], n_jobs=1) assumed_der[1, :], assumed_des[1, :] = desc.derivatives(samples[1], centers[1], n_jobs=1) assumed_der[2, :], assumed_des[2, :] = desc.derivatives(samples[2], centers[2], n_jobs=1) desc._sparse = sparse if sparse: der = der.todense() des = des.todense() self.assertTrue(np.allclose(assumed_der, der)) self.assertTrue(np.allclose(assumed_des, des)) # Includes includes = [[0], [0], [0]] der, des = desc.derivatives( system=samples, include=includes, n_jobs=2, ) self.assertTrue(der.shape == (3, 2, 1, 3, n_features)) self.assertTrue(des.shape == (3, 2, n_features)) assumed_der = np.empty((3, 2, 1, 3, n_features)) assumed_des = np.empty((3, 2, n_features)) desc._sparse = False assumed_der[0, :], assumed_des[0, :] = desc.derivatives( samples[0], include=includes[0], n_jobs=1) assumed_der[1, :], assumed_des[1, :] = desc.derivatives( samples[1], include=includes[1], n_jobs=1) assumed_der[2, :], assumed_des[2, :] = desc.derivatives( samples[2], include=includes[2], n_jobs=1) desc._sparse = sparse if sparse: der = der.todense() des = des.todense() self.assertTrue(np.allclose(assumed_der, der)) self.assertTrue(np.allclose(assumed_des, des)) # Excludes excludes = [[0], [0], [0]] der, des = desc.derivatives( system=samples, exclude=excludes, n_jobs=2, ) self.assertTrue(der.shape == (3, 2, 1, 3, n_features)) self.assertTrue(des.shape == (3, 2, n_features)) assumed_der = np.empty((3, 2, 1, 3, n_features)) assumed_des = np.empty((3, 2, n_features)) desc._sparse = False assumed_der[0, :], assumed_des[0, :] = desc.derivatives( samples[0], exclude=excludes[0], n_jobs=1) assumed_der[1, :], assumed_des[1, :] = desc.derivatives( samples[1], exclude=excludes[1], n_jobs=1) assumed_der[2, :], assumed_des[2, :] = desc.derivatives( samples[2], exclude=excludes[2], n_jobs=1) desc._sparse = sparse if sparse: der = der.todense() des = des.todense() self.assertTrue(np.allclose(assumed_der, der)) self.assertTrue(np.allclose(assumed_des, des)) # Test averaged output desc.average = "inner" positions = [[0], [0, 1], [1]] der, des = desc.derivatives( system=samples, positions=positions, n_jobs=2, ) self.assertTrue(der.shape == (3, 1, 2, 3, n_features)) self.assertTrue(des.shape == (3, 1, n_features)) desc._sparse = False assumed_der = np.empty((3, 1, 2, 3, n_features)) assumed_des = np.empty((3, 1, n_features)) assumed_der[0, :], assumed_des[0, :] = desc.derivatives( samples[0], positions=positions[0], n_jobs=1) assumed_der[1, :], assumed_des[1, :] = desc.derivatives( samples[1], positions=positions[1], n_jobs=1) assumed_der[2, :], assumed_des[2, :] = desc.derivatives( samples[2], positions=positions[2], n_jobs=1) desc._sparse = sparse if sparse: der = der.todense() des = des.todense() self.assertTrue(np.allclose(assumed_der, der)) self.assertTrue(np.allclose(assumed_des, des)) # Variable size list output, as the systems have a different size desc.average = "off" samples = [molecule("CO"), molecule("NO2"), molecule("OH")] der, des = desc.derivatives( system=samples, n_jobs=2, ) self.assertTrue(isinstance(der, list)) self.assertTrue(der[0].shape == (2, 2, 3, n_features)) self.assertTrue(der[1].shape == (3, 3, 3, n_features)) self.assertTrue(der[2].shape == (2, 2, 3, n_features)) desc._sparse = False for i in range(len(samples)): assumed_der, assumed_des = desc.derivatives(samples[i], n_jobs=1) i_der = der[i] i_des = des[i] if sparse: i_der = i_der.todense() i_des = i_des.todense() self.assertTrue(np.allclose(assumed_der, i_der)) self.assertTrue(np.allclose(assumed_des, i_des)) desc._sparse = sparse
temp = df['temp'] load = df['load'] print('Got training targets from spread sheet') species = set() for i in range(len(atoms)): species.update(atoms[i].get_chemical_symbols()) soap = SOAP(species=species, periodic=True, rcut=5, nmax=1, lmax=1, average="outer") print('Training hardness model...') soap.get_number_of_features() feature_vectors = soap.create(atoms, n_jobs=1) feature_tensor = th.tensor(feature_vectors) #print('DONE, SOAP descriptors ready to use') feature_pd = pd.DataFrame(feature_vectors) #feature pd is the soap descriptors #generate compositional descriptors df = pd.read_excel( '/Users/ziyanzhang/Downloads/dscribe/examples/hv_temp_cif_labels.xlsx') class Vectorize_Formula: def __init__(self): elem_dict = pd.read_excel( '/Users/ziyanzhang/Desktop/subgroup/elementsnew.xlsx'