def test_average_outer(self): """Tests the outer averaging (averaging done after calculating power spectrum). """ system, centers, args = get_soap_default_setup() # Create the average output for rbf in ["gto", "polynomial"]: desc = SOAP(**args, rbf=rbf, average="outer") average = desc.create(system, centers[0:2]) # Create individual output for both atoms desc = SOAP(**args, rbf=rbf, average="off") first = desc.create(system, [centers[0]])[0, :] second = desc.create(system, [centers[1]])[0, :] # Check that the averaging is done correctly assumed_average = (first + second) / 2 self.assertTrue(np.allclose(average, assumed_average))
def test_soap_structure(self): """Tests that when no positions are given, the SOAP for the full structure is calculated. """ lmax = 5 nmax = 5 desc = SOAP(atomic_numbers=[1, 8], rcut=5, nmax=nmax, lmax=lmax, periodic=True) vec = desc.create(H2O) self.assertTrue(vec.shape[0] == 3)
def __init__(self, molecule_map, r_cut, n_max, l_max, n_jobs=1): super().__init__(molecule_map, n_jobs) self.r_cut = r_cut self.n_max = n_max self.l_max = l_max self.dscribe_func = SOAP(species=self.species, periodic=False, rcut=r_cut, nmax=n_max, lmax=l_max)
def test_periodic_images(self): """Tests the periodic images seen by the descriptor """ desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=True,) molecule = H2O.copy() # Non-periodic for comparison molecule.set_cell([ [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0] ]) nocell = desc.create(molecule, positions=[[0, 0, 0]]).toarray() # Make periodic desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=True,) molecule.set_pbc(True) # Cubic molecule.set_cell([ [3.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, 0.0, 3.0] ]) cubic_cell = desc.create(molecule, positions=[[0, 0, 0]]).toarray() suce = molecule * (2, 1, 1) cubic_suce = desc.create(suce, positions=[[0, 0, 0]]).toarray() # Triclinic molecule.set_cell([ [0.0, 2.0, 2.0], [2.0, 0.0, 2.0], [2.0, 2.0, 0.0] ]) triclinic_cell = desc.create(molecule, positions=[[0, 0, 0]]).toarray() suce = molecule * (2, 1, 1) triclinic_suce = desc.create(suce, positions=[[0, 0, 0]]).toarray() self.assertTrue(np.sum(np.abs((nocell[:3] - cubic_suce[:3]))) > 0.1) self.assertAlmostEqual(np.sum(cubic_cell[:3] - cubic_suce[:3]), 0) self.assertAlmostEqual(np.sum(triclinic_cell[:3] - triclinic_suce[:3]), 0)
def test_crossover(self): """Tests that disabling/enabling crossover works as expected. """ pos = [[0.1, 0.1, 0.1]] # GTO desc = SOAP(species=[1, 8], rbf="gto", crossover=True, rcut=3, nmax=5, lmax=5, periodic=False) n_elem_feat = desc.get_number_of_element_features() full_output = desc.create(H2O, positions=pos) desc.crossover = False partial_output = desc.create(H2O, positions=pos) self.assertTrue( np.array_equal(full_output[:, 0:n_elem_feat], partial_output[:, 0:n_elem_feat])) self.assertTrue( np.array_equal(full_output[:, 2 * n_elem_feat:], partial_output[:, n_elem_feat:])) # Polynomial desc = SOAP(species=[1, 8], rbf="polynomial", crossover=True, rcut=3, nmax=5, lmax=5, periodic=False) n_elem_feat = desc.get_number_of_element_features() full_output = desc.create(H2O, pos) desc.crossover = False partial_output = desc.create(H2O, pos) self.assertTrue( np.array_equal(full_output[:, 0:n_elem_feat], partial_output[:, 0:n_elem_feat])) self.assertTrue( np.array_equal(full_output[:, 2 * n_elem_feat:], partial_output[:, n_elem_feat:]))
def test_system_input(self): """Tests that create takes internal system object. """ system = System.from_atoms(H2O) lmax = 5 nmax = 5 n_elems = 2 desc = SOAP(species=[1, 8], rcut=3, nmax=nmax, lmax=lmax, periodic=True) vec = desc.create(system)
def __init__(self, desc_spec): """ make a DScribe SOAP object """ from dscribe.descriptors import SOAP if "type" not in desc_spec.keys() or desc_spec["type"] != "SOAP": raise ValueError( "Type is not SOAP or cannot find the type of the descriptor") # required try: self.species = desc_spec['species'] self.cutoff = desc_spec['cutoff'] self.g = desc_spec['atom_gaussian_width'] self.n = desc_spec['n'] self.l = desc_spec['l'] except: raise ValueError( "Not enough information to intialize the `Atomic_Descriptor_SOAP` object" ) # we have defaults here if 'rbf' in desc_spec.keys(): self.rbf = desc_spec['rbf'] else: self.rbf = 'gto' if 'crossover' in desc_spec.keys(): self.crossover = bool(desc_spec['crossover']) else: self.crossover = False if 'periodic' in desc_spec.keys(): self.periodic = bool(desc_spec['periodic']) else: self.periodic = True self.soap = SOAP(species=self.species, rcut=self.cutoff, nmax=self.n, lmax=self.l, sigma=self.g, rbf=self.rbf, crossover=self.crossover, average='off', periodic=self.periodic) print("Using SOAP Descriptors ...") # make an acronym self.acronym = "SOAP-n" + str(self.n) + "-l" + str( self.l) + "-c" + str(self.cutoff) + "-g" + str(self.g)
def test_soap(version): """Tests how the SOAP descriptor calculation scales with system size. """ nmax = 4 lmax = 4 fig = mpl.figure(figsize=[9, 7]) ax = fig.add_subplot(111) ax.set_title("SOAP nmax={}, lmax={}, version={}".format( nmax, lmax, version)) ax.set_xlabel("Number of atoms") ax.set_ylabel("Time (s)") for rbf in ["gto", "polynomial"]: N = [] t = [] # Loop over different system sizes for ncells in tqdm(range(5, 20)): natoms = 2 * ncells**3 soap_generator = SOAP(rcut=3.0, nmax=nmax, lmax=lmax, species=["Ni", "Ti"], rbf=rbf, crossover=True, periodic=True) a = 2.993 niti = Atoms( "NiTi", positions=[[0.0, 0.0, 0.0], [a / 2, a / 2, a / 2]], cell=[a, a, a], pbc=[1, 1, 1], ) # Replicate system niti = niti * ncells a *= ncells t0 = time() soap_generator.create(niti) t1 = time() N.append(natoms) t.append(t1 - t0) N = np.array(N) t = np.array(t) ax.plot(N, t, "o--", label="{}".format(rbf)) mpl.legend() mpl.savefig("soap_scaling_{}.pdf".format(version))
def create_gto(system): desc = SOAP( atomic_numbers=system.get_atomic_numbers(), rcut=8.0, lmax=5, nmax=5, rbf="gto", periodic=False, crossover=True ) return desc.create(system)
def create_poly(system): desc = SOAP( atomic_numbers=system.get_atomic_numbers(), rcut=8.0, lmax=2, nmax=1, rbf="polynomial", periodic=False, crossover=True ) return desc.create(system)
def test_sparse(self): """Tests the sparse matrix creation. """ # Dense desc = SOAP(species=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=False) vec = desc.create(H2O) self.assertTrue(type(vec) == np.ndarray) # Sparse desc = SOAP(species=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=True) vec = desc.create(H2O) self.assertTrue(type(vec) == scipy.sparse.coo_matrix)
def test_weighting(self): """Tests that the weighting done with C corresponds to the easier-to-code but less performant python version. """ system, centers, args = get_soap_default_setup() for rbf in ["gto", "polynomial"]: for weighting in [ {"function": "poly", "r0": 2, "c": 3, "m": 4}, {"function": "pow", "r0": 2, "c": 3, "d": 4, "m": 5}, {"function": "exp", "r0": 2, "c": 3, "d": 4}, ]: # Calculate the analytical power spectrum soap = SOAP(**args, rbf=rbf, weighting=weighting) analytical_power_spectrum = soap.create(system, positions=centers) # Calculate and save the numerical power spectrum to disk filename = ( "{rbf}_coefficients_{nmax}_{lmax}_{rcut}_{sigma}_{func}.npy".format( **args, rbf=rbf, func=weighting["function"] ) ) # coeffs = getattr(self, "coefficients_{}".format(rbf))( # system_num, # soap_centers_num, # nmax_num, # lmax_num, # rcut_num, # sigma_num, # weighting, # ) # np.save(filename, coeffs) # Load coefficients from disk coeffs = np.load(filename) numerical_power_spectrum = self.get_power_spectrum( coeffs, crossover=args["crossover"] ) # print("Numerical: {}".format(numerical_power_spectrum)) # print("Analytical: {}".format(analytical_power_spectrum)) self.assertTrue( np.allclose( numerical_power_spectrum, analytical_power_spectrum, atol=1e-15, rtol=0.01, ) )
def test_rbf_orthonormality(self): """Tests that the gto radial basis functions are orthonormal.""" sigma = 0.15 rcut = 2.0 nmax = 2 lmax = 20 soap = SOAP( species=[1], lmax=lmax, nmax=nmax, sigma=sigma, rcut=rcut, crossover=True, sparse=False, ) alphas = np.reshape(soap._alphas, [lmax + 1, nmax]) betas = np.reshape(soap._betas, [lmax + 1, nmax, nmax]) nr = 10000 n_basis = 0 functions = np.zeros((nmax, lmax + 1, nr)) # Form the radial basis functions for n in range(nmax): for l in range(lmax + 1): gto = np.zeros((nr)) rspace = np.linspace(0, rcut + 5, nr) for k in range(nmax): gto += ( betas[l, n, k] * rspace ** l * np.exp(-alphas[l, k] * rspace ** 2) ) n_basis += 1 functions[n, l, :] = gto # Calculate the overlap integrals S = np.zeros((nmax, nmax)) for l in range(lmax + 1): for i in range(nmax): for j in range(nmax): overlap = np.trapz( rspace ** 2 * functions[i, l, :] * functions[j, l, :], dx=(rcut + 5) / nr, ) S[i, j] = overlap # Check that the basis functions for each l are orthonormal diff = S - np.eye(nmax) self.assertTrue(np.allclose(diff, np.zeros((nmax, nmax)), atol=1e-3))
def test_positions(self): """Tests that different positions are handled correctly. """ desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=True) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True,) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=False,) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) desc = SOAP(species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=False,) n_feat = desc.get_number_of_features() self.assertEqual((1, n_feat), desc.create(H2O, positions=np.array([[0, 0, 0]])).shape) self.assertEqual((1, n_feat), desc.create(H2O, positions=[[0, 0, 0]]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=[0, 1, 2]).shape) self.assertEqual((3, n_feat), desc.create(H2O, positions=np.array([0, 1, 2])).shape) self.assertEqual((3, n_feat), desc.create(H2O).shape) with self.assertRaises(ValueError): desc.create(H2O, positions=['a'])
def test_dtype(self): """Tests that the the specified data type is respected.""" # Dense, float32 soap = SOAP(species=[1, 8], rcut=3, nmax=1, lmax=1, dtype="float32") desc1 = soap.create(H2O) der, desc2 = soap.derivatives(H2O) self.assertTrue(desc1.dtype == np.float32) self.assertTrue(desc2.dtype == np.float32) self.assertTrue(der.dtype == np.float32) # Sparse, float32 soap = SOAP( species=[1, 8], rcut=3, nmax=1, lmax=1, sparse=True, dtype="float32" ) desc1 = soap.create(H2O) der, desc2 = soap.derivatives(H2O) self.assertTrue(desc1.dtype == np.float32) self.assertTrue(desc2.dtype == np.float32) self.assertTrue(der.dtype == np.float32) # Dense, float64 soap = SOAP(species=[1, 8], rcut=3, nmax=1, lmax=1, dtype="float64") desc1 = soap.create(H2O) der, desc2 = soap.derivatives(H2O) self.assertTrue(desc1.dtype == np.float64) self.assertTrue(desc2.dtype == np.float64) self.assertTrue(der.dtype == np.float64) # Sparse, float64 soap = SOAP( species=[1, 8], rcut=3, nmax=1, lmax=1, sparse=True, dtype="float64" ) desc1 = soap.create(H2O) der, desc2 = soap.derivatives(H2O) self.assertTrue(desc1.dtype == np.float64) self.assertTrue(desc2.dtype == np.float64) self.assertTrue(der.dtype == np.float64)
def main(args): """ Generates SOAP descriptors for the atoms saved in args.xyz :param args: :return: """ mols, num_list, atom_list, species = read_xyz(args.xyz) soap_generator = SOAP(species=species, periodic=False, rcut=args.rcut, nmax=8, lmax=6, sigma=args.sigma, sparse=True) soap = soap_generator.create(mols) soap = normalize(soap, copy=False) np.save(args.tgt, [soap])
def get_soap(file, rcut=7, nmax=5, lmax=8): print('./' + file) ml = vasp.read_vasp('./' + file) species = ['Cd', 'Te'] periodic_soap = SOAP(periodic=True, species=species, rcut=rcut, nmax=nmax, lmax=lmax, rbf='gto', sigma=0.125, average=True) soap = periodic_soap.create(ml) #soap = 1 return soap
def fit(self, x, y=None): self.adaptor = AseAtomsAdaptor() self.soap = SOAP( species=self.species, periodic=self.periodic, rcut=self.rcut, nmax=self.nmax, lmax=self.lmax, rbf=self.rbf, sigma=self.sigma, average=self.average, ) #flattened_entry_list = [self.adaptor.get_atoms(struct) for struct in x] #self.soap_raw = self.soap.create(flattened_entry_list) return self
def main(system, cutoff, average, overwrite=True): files = sorted( glob.glob('{}/Lipids/trajectories_{}{}/POPC_*xyz'.format( HOME, system, TR))) print('Processing only 303k for now') files = [i for i in files if '303' in i] folder = '{}/Lipids/dscribe_{}{}/{}/{}_ang/'.format( HOME, system, TR, get_folder(average), cutoff) if not os.path.isdir(folder): os.mkdir(folder) for f in files: save_name = folder + f[:-4].split('/')[-1] if not os.path.isfile(save_name + '.npy') or overwrite: soap_input = dict( average=average, periodic=True, species=species[system], rcut=cutoff, nmax=8, lmax=8, ) traj = read_traj(f) box = np.loadtxt(f[:-4] + '.box') for i, j in enumerate(traj): traj[i].set_cell(list(box[i])) traj[i].set_pbc([1, 1, 0]) tt = time.time() soap = SOAP(**soap_input) N = len(traj) pos = [list(np.where(traj[0].get_atomic_numbers() == 15)[0])] soap_vec = soap.create(traj, positions=pos * N) np.savez_compressed(save_name, soap_vec) print('saved {}'.format(save_name), time.time() - tt) else: print('skip {}'.format(save_name))
def test_get_location_wo_crossover(self): """Tests that disabling/enabling crossover works as expected.""" # With crossover species = ["H", "O", "C"] desc = SOAP( species=species, rbf="gto", crossover=False, rcut=3, nmax=5, lmax=5, periodic=False, ) # Symbols loc_hh = desc.get_location(("H", "H")) loc_oo = desc.get_location(("O", "O")) loc_cc = desc.get_location(("C", "C")) # Undefined elements with self.assertRaises(ValueError): desc.get_location((2, 1)) with self.assertRaises(ValueError): desc.get_location(("He", "H")) # Check that pairwise distances are not supported with self.assertRaises(ValueError): loc_oo = desc.get_location(("H", "O")) loc_oo = desc.get_location(("H", "C")) loc_oo = desc.get_location(("C", "H")) # Check that slices in the output are correctly empty or filled co2 = molecule("CO2") h2o = molecule("H2O") co2_out = desc.create(co2) h2o_out = desc.create(h2o) # H-H self.assertTrue(co2_out[:, loc_hh].sum() == 0) self.assertTrue(h2o_out[:, loc_hh].sum() != 0) # C-C self.assertTrue(co2_out[:, loc_cc].sum() != 0) self.assertTrue(h2o_out[:, loc_cc].sum() == 0) # O-O self.assertTrue(co2_out[:, loc_oo].sum() != 0) self.assertTrue(h2o_out[:, loc_oo].sum() != 0)
def SOAP_Definition(species=None): if not species: species = ["H", "C", "N", "O", "F", "S"] rcut = 6.0 nmax = 8 lmax = 6 # Setting up the SOAP descriptor soap = SOAP( species=species, periodic=False, rcut=rcut, nmax=nmax, lmax=lmax, ) return soap
def test_sparse(self): """Tests that sparse features may also be used to construct the kernels. """ # Create SOAP features for a system desc = SOAP(species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=True) a = molecule('H2O') a_feat = desc.create(a) kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6) K = kernel.create([a_feat])
def test_number_of_features(self): """Tests that the reported number of features is correct.""" lmax = 5 nmax = 5 n_elems = 2 desc = SOAP(species=[1, 8], rcut=3, nmax=nmax, lmax=lmax, periodic=True) # Test that the reported number of features matches the expected n_features = desc.get_number_of_features() expected = int((lmax + 1) * (nmax * n_elems) * (nmax * n_elems + 1) / 2) self.assertEqual(n_features, expected) # Test that the outputted number of features matches the reported n_features = desc.get_number_of_features() vec = desc.create(H2O) self.assertEqual(n_features, vec.shape[1])
def test_sparse(self): """Tests that sparse features may also be used to construct the kernels. """ # Create SOAP features for a system desc = SOAP([1, 8], 5.0, 2, 2, sigma=0.2, periodic=False, crossover=True, sparse=True) a = molecule('H2O') a_feat = desc.create(a) kernel = AverageKernel(metric="linear") K = kernel.create([a_feat])
def __init__(self, rcut: float = 6, nmax: int = 8, lmax: int = 6, species=frozenset({'C', 'O', 'H', 'N', 'F'})): """Initialize the converter Args: rcut (float); Cutoff radius nmax (int): lmax (int): species (Iterable): List of elements to include in potential """ super().__init__() self.soap = SOAP(rcut=rcut, nmax=nmax, lmax=lmax, species=sorted(species))
def createDescriptorsSingleSOAP(data, species, sigma_SOAP, cutoff_SOAP, nmax_SOAP, lmax_SOAP, periodic, sparse_SOAP=default_sparse_SOAP): # Initialize SOAP soap = SOAP(species=species, sigma=sigma_SOAP, periodic=periodic, rcut=cutoff_SOAP, nmax=nmax_SOAP, lmax=lmax_SOAP, sparse=sparse_SOAP) return soap.create(data)
def test_invalid_system(self): """Tests that an invalid input type throws the appropriate error. """ lmax = 5 nmax = 5 n_elems = 2 desc = SOAP(species=[1, 8], rcut=3, nmax=nmax, lmax=lmax, periodic=True) with self.assertRaises(ValueError): vec = desc.create("invalid input") with self.assertRaises(ValueError): vec = desc.create([1,2,3]) with self.assertRaises(ValueError): vec = desc.create([dict(foo = 1, bar = 2),set([5,4]), [1,2,3]]) with self.assertRaises(ValueError): vec = desc.create(desc)
def compute_desc(molecs, dataset='zundel_100k', soap_params=None, parallelize=True): params = copy.deepcopy(datasets[dataset]) if soap_params != None: params['soap'].update(soap_params) tot_time = np.shape(molecs)[0] soap = SOAP(**params['soap']) descriptors = soap.create( molecs, positions=[np.arange(params['atoms']) for i in range(tot_time)], n_jobs=multiprocessing.cpu_count() if parallelize else 1) return np.reshape(descriptors, (tot_time, params['atoms'], np.shape(descriptors)[1]))
def test_return_descriptor(self): soap = SOAP( species=[1, 8], rcut=3, nmax=2, lmax=0, rbf="gto", sparse=False, periodic=False, ) s = soap.derivatives(H2O, method="analytical", return_descriptor=False) D, d = soap.derivatives(H2O, method="analytical", return_descriptor=True) s = soap.derivatives(H2O, method="numerical", return_descriptor=False) D, d = soap.derivatives(H2O, method="numerical", return_descriptor=True)
def getSOAPs(geometries, species, rcut=5, nmax=10, lmax=9, sigma=0.1, periodic=True, crossover=True, sparse=False): """ Takes a Series of geometries with one He present, returns SOAP representation of the chemical environment of He for each item Assumes any given structure in ``geometries`` has the same collection of elements as all the other structures Assumes any given structure in ``geometries`` has the same number of atoms as all the other structures Input: geometries: Series of Atoms objects; each must contain exactly 1 He atom rcut, nmax, lmax, sigma, periodic, crossover, sparse: SOAP parameters Output: output: Series of SOAP matrices, each corresponding to the appropriate index """ # refgeom = geometries.iloc[0] #use the first geometry as a reference geometry ## set up descriptor # species = np.unique([i.symbol for i in refgeom]) desc = SOAP(species=species, rcut=rcut, nmax=nmax, lmax=lmax, sigma=sigma, periodic=periodic, crossover=crossover, sparse=sparse) ## apply descriptor soaps = {} for i, geom in geometries.iteritems(): HeLoc = len(geom) - 1 # assume He atom is last one in Atoms list tempSOAP = preprocessing.normalize( desc.create(geom, positions=[HeLoc], n_jobs=4)) # SOAP representation of temp soaps[i] = tempSOAP[0] return pd.Series(soaps, name='SOAP')