def test_xy(self): """Tests that the kernel can be also calculated between two different sets, which is necessary for making predictions with kernel-based methods. """ # Create SOAP features for a system desc = SOAP( species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False, ) a = molecule("H2O") b = molecule("O2") c = molecule("H2O2") a_feat = desc.create(a) b_feat = desc.create(b) c_feat = desc.create(c) # Linear dot-product kernel kernel = AverageKernel(metric="linear") K = kernel.create([a_feat, b_feat], [c_feat]) self.assertEqual(K.shape, (2, 1))
def coefficients_gto(system, centers, args): """Used to numerically calculate the inner product coefficients of SOAP with GTO radial basis. """ nmax = args["nmax"] lmax = args["lmax"] rcut = args["rcut"] sigma = args["sigma"] weighting = args.get("weighting") positions = system.get_positions() symbols = system.get_chemical_symbols() atomic_numbers = system.get_atomic_numbers() species_ordered = sorted(list(set(atomic_numbers))) n_elems = len(species_ordered) # Calculate the weights and decays of the radial basis functions. soap = SOAP(**args) soap.create(system, positions=centers) alphas = np.reshape(soap._alphas, [lmax + 1, nmax]) betas = np.reshape(soap._betas, [lmax + 1, nmax, nmax]) def rbf_gto(r, n, l): i_alpha = alphas[l, 0:nmax] i_beta = betas[l, n, 0:nmax] return (i_beta * r ** l * np.exp(-i_alpha * r ** 2)).sum() return soap_integration(system, centers, args, rbf_gto)
def soap_gto_vs_polynomial(version): """GTO vs polynomial RBF scaling. """ nmax = 4 lmax = 4 fig = mpl.figure(figsize=[9, 7]) ax = fig.add_subplot(111) ax.set_title("SOAP nmax={}, lmax={}, version={}".format( nmax, lmax, version)) ax.set_xlabel("Number of atoms") ax.set_ylabel("Time (s)") for rbf in ["gto", "polynomial"]: N = [] t = [] for ncells in tqdm(range(5, 15)): soap_generator = SOAP(rcut=3.0, nmax=nmax, lmax=lmax, species=["Ni", "Ti"], rbf=rbf, crossover=True, periodic=True) i_system = system_periodic.copy() * ncells t0 = time() soap_generator.create(i_system) t1 = time() N.append(len(i_system)) t.append(t1 - t0) ax.plot(N, t, "o--", label="{}".format(rbf)) mpl.legend() mpl.show()
def test_periodic_images(self): """Tests the periodic images seen by the descriptor""" desc = SOAP( species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=True ) molecule = H2O.copy() # Non-periodic for comparison molecule.set_cell([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]) nocell = desc.create(molecule, positions=[[0, 0, 0]]) # Make periodic desc = SOAP( species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True ) molecule.set_pbc(True) # Cubic molecule.set_cell([[3.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, 0.0, 3.0]]) cubic_cell = desc.create(molecule, positions=[[0, 0, 0]]) suce = molecule * (2, 1, 1) cubic_suce = desc.create(suce, positions=[[0, 0, 0]]) # Triclinic molecule.set_cell([[0.0, 2.0, 2.0], [2.0, 0.0, 2.0], [2.0, 2.0, 0.0]]) triclinic_cell = desc.create(molecule, positions=[[0, 0, 0]]) suce = molecule * (2, 1, 1) triclinic_suce = desc.create(suce, positions=[[0, 0, 0]]) self.assertTrue(np.sum(np.abs((nocell[:3] - cubic_suce[:3]))) > 0.1) self.assertAlmostEqual(np.sum(cubic_cell[:3] - cubic_suce[:3]), 0) self.assertAlmostEqual(np.sum(triclinic_cell[:3] - triclinic_suce[:3]), 0)
def test_xy(self): """Tests that the kernel can be also calculated between two different sets, which is necessary for making predictions with kernel-based methods. """ # Create SOAP features for a system desc = SOAP([1, 8], 5.0, 2, 2, sigma=0.2, periodic=False, crossover=True, sparse=False) a = molecule('H2O') b = molecule('O2') c = molecule('H2O2') a_feat = desc.create(a) b_feat = desc.create(b) c_feat = desc.create(c) # Linear dot-product kernel kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6) K = kernel.create([a_feat, b_feat], [c_feat]) self.assertEqual(K.shape, (2, 1))
def test_difference(self): """Tests that the similarity is correct. """ # Create SOAP features for a system desc = SOAP(species=[1, 6, 7, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False) # Calculate that identical molecules are identical. a = molecule("H2O") a_features = desc.create(a) kernel = AverageKernel(metric="linear") K = kernel.create([a_features, a_features]) self.assertTrue(np.all(np.abs(K - 1) < 1e-3)) # Check that completely different molecules are completely different a = molecule("N2") b = molecule("H2O") a_features = desc.create(a) b_features = desc.create(b) K = kernel.create([a_features, b_features]) self.assertTrue(np.all(np.abs(K - np.eye(2)) < 1e-3)) # Check that somewhat similar molecules are somewhat similar a = molecule("H2O") b = molecule("H2O2") a_features = desc.create(a) b_features = desc.create(b) K = kernel.create([a_features, b_features]) self.assertTrue(K[0, 1] > 0.9)
def test_multiple_species(self): """Tests multiple species are handled correctly. """ lmax = 5 nmax = 5 species = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] desc = SOAP(species=species, rcut=5, nmax=nmax, lmax=lmax, periodic=False, sparse=False) pos = np.expand_dims(np.linspace(0, 8, 8), 1) pos = np.hstack((pos, pos, pos)) sys = Atoms(symbols=species[0:8], positions=pos, pbc=False) vec1 = desc.create(sys) sys2 = Atoms(symbols=species[8:], positions=pos, pbc=False) vec2 = desc.create(sys2) sys3 = Atoms(symbols=species[4:12], positions=pos, pbc=False) vec3 = desc.create(sys3) dot1 = np.dot(vec1[6, :], vec2[6, :]) dot2 = np.dot(vec1[3, :], vec3[3, :]) dot3 = np.dot(vec2[3, :], vec3[3, :]) # The dot product for systems without overlap in species should be zero self.assertTrue(abs(dot1) <= 1e-8) # The systems with overlap in the elements should have onerlap in the # dot product self.assertTrue(abs(dot2) > 1e-3) self.assertTrue(abs(dot3) > 1e-3)
def test_average(self): """Tests that the average output is created correctly. """ sys = Atoms(symbols=["H", "C"], positions=[[-1, 0, 0], [1, 0, 0]], cell=[2, 2, 2], pbc=True) # Create the average output desc = SOAP( atomic_numbers=[1, 6, 8], rcut=5, nmax=3, lmax=5, periodic=False, crossover=True, average=True, sparse=False ) average = desc.create(sys)[0, :] # Create individual output for both atoms desc = SOAP( atomic_numbers=[1, 6, 8], rcut=5, nmax=3, lmax=5, periodic=False, crossover=True, average=False, sparse=False ) first = desc.create(sys, positions=[0])[0, :] second = desc.create(sys, positions=[1])[0, :] # Check that the averaging is done correctlyl assumed_average = (first+second)/2 self.assertTrue(np.array_equal(average, assumed_average))
def test_constructor(self): """Tests different valid and invalid constructor values. """ # Invalid gaussian width with self.assertRaises(ValueError): SOAP(species=[-1, 2], rcut=5, sigma=0, nmax=5, lmax=5, periodic=True) with self.assertRaises(ValueError): SOAP(species=[-1, 2], rcut=5, sigma=-1, nmax=5, lmax=5, periodic=True) # Invalid rcut with self.assertRaises(ValueError): SOAP(species=[-1, 2], rcut=0.5, sigma=0, nmax=5, lmax=5, periodic=True) # Invalid lmax with self.assertRaises(ValueError): SOAP(species=[-1, 2], rcut=0.5, sigma=0, nmax=5, lmax=10, rbf="gto", periodic=True) # Invalid nmax with self.assertRaises(ValueError): SOAP(species=["H", "O"], rcut=4, sigma=1, nmax=0, lmax=8, rbf="gto", periodic=True) # Too high radial basis set density: poly with self.assertRaises(ValueError): a = SOAP(species=["H", "O"], rcut=10, sigma=0.5, nmax=12, lmax=8, rbf="polynomial", periodic=False) a.create(H2O) # Too high radial basis set density: gto with self.assertRaises(ValueError): a = SOAP(species=["H", "O"], rcut=10, sigma=0.5, nmax=20, lmax=8, rbf="gto", periodic=False) a.create(H2O)
def test_is_periodic(self): """Tests whether periodic images are seen by the descriptor""" system = H2O.copy() desc = SOAP( species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=True, ) system.set_pbc(False) nocell = desc.create(system, positions=[[0, 0, 0]]) system.set_pbc(True) system.set_cell([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]]) desc = SOAP( species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True ) cubic_cell = desc.create(system, positions=[[0, 0, 0]]) self.assertTrue(np.sum(cubic_cell) > 0)
def test_basis(self): """Tests that the output vectors behave correctly as a basis. """ sys1 = Atoms(symbols=["H", "H"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True) sys2 = Atoms(symbols=["O", "O"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True) sys3 = Atoms(symbols=["C", "C"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True) sys4 = Atoms(symbols=["H", "C"], positions=[[-1, 0, 0], [1, 0, 0]], cell=[2, 2, 2], pbc=True) sys5 = Atoms(symbols=["H", "C"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True) sys6 = Atoms(symbols=["H", "O"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True) sys7 = Atoms(symbols=["C", "O"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True) desc = SOAP( atomic_numbers=[1, 6, 8], rcut=5, nmax=3, lmax=5, periodic=False, crossover=True, sparse=False ) # Create vectors for each system vec1 = desc.create(sys1, positions=[[0, 0, 0]])[0, :] vec2 = desc.create(sys2, positions=[[0, 0, 0]])[0, :] vec3 = desc.create(sys3, positions=[[0, 0, 0]])[0, :] vec4 = desc.create(sys4, positions=[[0, 0, 0]])[0, :] vec5 = desc.create(sys5, positions=[[0, 0, 0]])[0, :] vec6 = desc.create(sys6, positions=[[0, 0, 0]])[0, :] vec7 = desc.create(sys7, positions=[[0, 0, 0]])[0, :] # The dot-product should be zero when there are no overlapping elements dot = np.dot(vec1, vec2) self.assertEqual(dot, 0) dot = np.dot(vec2, vec3) self.assertEqual(dot, 0) # The dot-product should be non-zero when there are overlapping elements dot = np.dot(vec4, vec5) self.assertNotEqual(dot, 0) # Check that self-terms are in correct location n_elem_feat = desc.get_number_of_element_features() h_part1 = vec1[0:n_elem_feat] h_part2 = vec2[0:n_elem_feat] h_part4 = vec4[0:n_elem_feat] self.assertNotEqual(np.sum(h_part1), 0) self.assertEqual(np.sum(h_part2), 0) self.assertNotEqual(np.sum(h_part4), 0) # Check that cross terms are in correct location hc_part1 = vec1[1*n_elem_feat:2*n_elem_feat] hc_part4 = vec4[1*n_elem_feat:2*n_elem_feat] co_part6 = vec6[4*n_elem_feat:5*n_elem_feat] co_part7 = vec7[4*n_elem_feat:5*n_elem_feat] self.assertEqual(np.sum(hc_part1), 0) self.assertNotEqual(np.sum(hc_part4), 0) self.assertEqual(np.sum(co_part6), 0) self.assertNotEqual(np.sum(co_part7), 0)
def test_crossover(self): """Tests that disabling/enabling crossover works as expected.""" pos = [[0.1, 0.1, 0.1]] species = [1, 8] nmax = 5 lmax = 5 # GTO desc = SOAP( species=species, rbf="gto", crossover=True, rcut=3, nmax=nmax, lmax=lmax, periodic=False, ) hh_loc_full = desc.get_location(("H", "H")) oo_loc_full = desc.get_location(("O", "O")) full_output = desc.create(H2O, positions=pos) desc.crossover = False hh_loc = desc.get_location(("H", "H")) oo_loc = desc.get_location(("O", "O")) partial_output = desc.create(H2O, positions=pos) self.assertTrue(oo_loc_full != oo_loc) self.assertTrue( np.array_equal(full_output[:, hh_loc_full], partial_output[:, hh_loc]) ) self.assertTrue( np.array_equal(full_output[:, oo_loc_full], partial_output[:, oo_loc]) ) # Polynomial desc = SOAP( species=species, rbf="polynomial", crossover=True, rcut=3, nmax=lmax, lmax=lmax, periodic=False, ) hh_loc_full = desc.get_location(("H", "H")) oo_loc_full = desc.get_location(("O", "O")) full_output = desc.create(H2O, pos) desc.crossover = False hh_loc = desc.get_location(("H", "H")) oo_loc = desc.get_location(("O", "O")) partial_output = desc.create(H2O, pos) self.assertTrue(oo_loc_full != oo_loc) self.assertTrue( np.array_equal(full_output[:, hh_loc_full], partial_output[:, hh_loc]) ) self.assertTrue( np.array_equal(full_output[:, oo_loc_full], partial_output[:, oo_loc]) )
def test_soap(version): """Tests how the SOAP descriptor calculation scales with system size. """ nmax = 4 lmax = 4 fig = mpl.figure(figsize=[9, 7]) ax = fig.add_subplot(111) ax.set_title("SOAP nmax={}, lmax={}, version={}".format( nmax, lmax, version)) ax.set_xlabel("Number of atoms") ax.set_ylabel("Time (s)") for rbf in ["gto", "polynomial"]: N = [] t = [] # Loop over different system sizes for ncells in tqdm(range(5, 20)): natoms = 2 * ncells**3 soap_generator = SOAP(rcut=3.0, nmax=nmax, lmax=lmax, species=["Ni", "Ti"], rbf=rbf, crossover=True, periodic=True) a = 2.993 niti = Atoms( "NiTi", positions=[[0.0, 0.0, 0.0], [a / 2, a / 2, a / 2]], cell=[a, a, a], pbc=[1, 1, 1], ) # Replicate system niti = niti * ncells a *= ncells t0 = time() soap_generator.create(niti) t1 = time() N.append(natoms) t.append(t1 - t0) N = np.array(N) t = np.array(t) ax.plot(N, t, "o--", label="{}".format(rbf)) mpl.legend() mpl.savefig("soap_scaling_{}.pdf".format(version))
def test_sparse(self): """Tests the sparse matrix creation.""" # Dense desc = SOAP(species=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=False) vec = desc.create(H2O) self.assertTrue(type(vec) == np.ndarray) # Sparse desc = SOAP(species=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=True) vec = desc.create(H2O) self.assertTrue(type(vec) == sparse.COO)
def test_sparse(self): """Tests the sparse matrix creation. """ # Dense desc = SOAP(atomic_numbers=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=False) vec = desc.create(H2O) self.assertTrue(type(vec) == np.ndarray) # Sparse desc = SOAP(atomic_numbers=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=True) vec = desc.create(H2O) self.assertTrue(type(vec) == scipy.sparse.coo_matrix)
def _featurize(self, struct: PymatgenStructure) -> np.ndarray: """Calculate SOAP descriptor from pymatgen structure. Parameters ---------- struct: pymatgen.Structure A periodic crystal composed of a lattice and a sequence of atomic sites with 3D coordinates and elements. Returns ------- features: np.ndarray soap descriptor """ soap = SOAP( periodic=self.periodic, species=self.species, rcut=self.rcut, nmax=self.nmax, lmax=self.lmax, rbf=self.rbf, sigma=self.sigma, average=self.average, ) if self.convert: adaptor = AseAtomsAdaptor() struct = adaptor.get_atoms(struct) features = soap.create(struct) features = np.asarray(features) return features
def test_metrics(self): """Tests that different metrics as defined by scikit-learn can be used.""" # Create SOAP features for a system desc = SOAP( species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False, ) a = molecule("H2O") a_features = desc.create(a) # Linear dot-product kernel kernel = AverageKernel(metric="linear") K = kernel.create([a_features, a_features]) # Gaussian kernel kernel = AverageKernel(metric="rbf", gamma=1) K = kernel.create([a_features, a_features]) # Laplacian kernel kernel = AverageKernel(metric="laplacian", gamma=1) K = kernel.create([a_features, a_features])
def test_convergence_infinity(self): """Tests that the REMatch kernel correctly converges to the average kernel at the the limit of infinite alpha. """ # Create SOAP features for a system desc = SOAP( species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False, ) a = molecule("H2O") b = molecule("H2O2") a_features = desc.create(a) b_features = desc.create(b) # REMatch kernel with very high alpha kernel_re = REMatchKernel(metric="linear", alpha=1e20, threshold=1e-6) K_re = kernel_re.create([a_features, b_features]) # Average kernel kernel_ave = AverageKernel(metric="linear") K_ave = kernel_ave.create([a_features, b_features]) # Test approximate equality self.assertTrue(np.allclose(K_re, K_ave))
def create_soap(nmax, lmax, rcut, procs): species = ['Au'] soap = SOAP( species=species, periodic=False, rcut=rcut, nmax=nmax, lmax=lmax, ) print('Calculating SOAPs on', procs, 'procs') print('nmax, lmax, rcut =', nmax, lmax, rcut) start = time.time() soap_struc = soap.create(struc, grid, n_jobs=procs) elapsed = time.time() - start print('DONE in ', elapsed, '\n') print('Shape of SOAPs:', soap_struc.shape) print('Writing SOAPs to disk...') start = time.time() write_pickle( soap_struc, path + 'soap_n_' + str(nmax) + '_l_' + str(lmax) + '_r_' + str(rcut) + '_p_' + str(procs)) elapsed = time.time() - start print('DONE in ', elapsed, '\n') print('ALL DONE')
def test_get_location_w_crossover(self): """Tests that disabling/enabling crossover works as expected. """ # With crossover species = ["H", "O", "C"] desc = SOAP(species=species, rbf="gto", crossover=True, rcut=3, nmax=5, lmax=5, periodic=False) # Symbols loc_hh = desc.get_location(("H", "H")) loc_ho = desc.get_location(("H", "O")) loc_oh = desc.get_location(("O", "H")) loc_oo = desc.get_location(("O", "O")) loc_cc = desc.get_location(("C", "C")) loc_co = desc.get_location(("C", "O")) loc_ch = desc.get_location(("C", "H")) # Undefined elements with self.assertRaises(ValueError): desc.get_location((2, 1)) with self.assertRaises(ValueError): desc.get_location(("He", "H")) # Check that slices in the output are correctly empty or filled co2 = molecule("CO2") h2o = molecule("H2O") co2_out = desc.create(co2) h2o_out = desc.create(h2o) # Check that slices with reversed atomic numbers are identical self.assertTrue(loc_ho == loc_oh) # H-H self.assertTrue(co2_out[:, loc_hh].sum() == 0) self.assertTrue(h2o_out[:, loc_hh].sum() != 0) # H-C self.assertTrue(co2_out[:, loc_ch].sum() == 0) self.assertTrue(h2o_out[:, loc_ch].sum() == 0) # H-O self.assertTrue(co2_out[:, loc_ho].sum() == 0) self.assertTrue(h2o_out[:, loc_ho].sum() != 0) # C-O self.assertTrue(co2_out[:, loc_co].sum() != 0) self.assertTrue(h2o_out[:, loc_co].sum() == 0) # C-C self.assertTrue(co2_out[:, loc_cc].sum() != 0) self.assertTrue(h2o_out[:, loc_cc].sum() == 0) # O-O self.assertTrue(co2_out[:, loc_oo].sum() != 0) self.assertTrue(h2o_out[:, loc_oo].sum() != 0)
class SOAPConverter(BaseEstimator, TransformerMixin): """Compute the SOAP descriptors for molecules""" def __init__(self, rcut: float = 6, nmax: int = 8, lmax: int = 6, species=frozenset({'C', 'O', 'H', 'N', 'F'})): """Initialize the converter Args: rcut (float); Cutoff radius nmax (int): lmax (int): species (Iterable): List of elements to include in potential """ super().__init__() self.soap = SOAP(rcut=rcut, nmax=nmax, lmax=lmax, species=sorted(species)) def fit(self, X, y=None): return self def transform(self, X, y=None): return [self.soap.create(x) for x in X]
def getSOAPs(geometries, species, rcut, sigma, nmax = 10, lmax = 9, periodic = True, crossover = True, sparse = False): """ Takes a Series of geometries with one He present, returns SOAP representation of the chemical environment of He for each item Assumes any given structure in ``geometries`` has the same collection of elements as all the other structures Assumes any given structure in ``geometries`` has the same number of atoms as all the other structures Input: geometries: Series of Atoms objects; each must contain exactly 1 He atom rcut, nmax, lmax, sigma, periodic, crossover, sparse: SOAP parameters Output: output: Series of SOAP matrices, each corresponding to the appropriate index """ # refgeom = geometries.iloc[0] #use the first geometry as a reference geometry ## set up descriptor # species = np.unique([i.symbol for i in refgeom]) desc = SOAP(species=species, rcut = rcut, nmax = nmax, lmax = lmax, sigma = sigma, periodic = periodic, crossover = crossover, sparse = sparse) ## apply descriptor soaps = {} for i, geom in geometries.iteritems(): HeLoc = len(geom) - 1 # assume He atom is last one in Atoms list tempSOAP = preprocessing.normalize( desc.create(geom, positions = [HeLoc], n_jobs = 4)) # SOAP representation of temp soaps[i] = tempSOAP[0] return pd.Series(soaps,name = 'SOAP')
def createDescriptorsAllSOAP(data, species, sigma_SOAP, cutoff_SOAP, nmax_SOAP, lmax_SOAP, periodic, sparse_SOAP=default_sparse_SOAP): # Initialize SOAP soap = SOAP(species=species, sigma=sigma_SOAP, periodic=periodic, rcut=cutoff_SOAP, nmax=nmax_SOAP, lmax=lmax_SOAP, sparse=sparse_SOAP) # Compute number of features n_features = soap.get_number_of_features() n_atoms = np.shape(data[0])[0] n_steps = len(data) # Computing descriptors descriptors = np.empty((n_atoms, n_steps, n_features), dtype=object) for index_structure in tqdm.tqdm(range(n_steps)): descriptors[:, index_structure, :] = soap.create(data[index_structure]) descriptors_ = [] for atom in range(n_atoms): descriptors_.append(descriptors[atom, :, :]) return descriptors_
def test_poly_integration(self): """Tests that the partial power spectrum with the polynomial basis done with C corresponds to the easier-to-code but less performant integration done with python. """ # Calculate mostly analytical (radial part is integrated numerically) # power spectrum system, centers, args = get_soap_polynomial_lmax_setup() soap = SOAP(**args, rbf="polynomial", dtype="float64") analytical_power_spectrum = soap.create(system, positions=centers) # Calculate numerical power spectrum coeffs = load_polynomial_coefficients(args) numerical_power_spectrum = self.get_power_spectrum( coeffs, crossover=args["crossover"] ) # print("Numerical: {}".format(numerical_power_spectrum)) # print("Analytical: {}".format(analytical_power_spectrum)) # print(analytical_power_spectrum.dtype) self.assertTrue( np.allclose( numerical_power_spectrum, analytical_power_spectrum, atol=1e-15, rtol=0.01, ) )
def extract_descriptor(rows): soaps, targets = [], [] for row in rows: atoms_Au_Fe = row.toatoms() atoms_all_Fe = Atoms() atoms_all_Fe.set_cell(atoms_Au_Fe.get_cell()) atoms_all_Fe.set_pbc(atoms_Au_Fe.get_pbc()) Au_idx_lst = [] for idx, at in enumerate(atoms_Au_Fe): if at.symbol == 'Fe': atoms_all_Fe.append(Atom(at.symbol, at.position)) elif at.symbol == 'Au': atoms_all_Fe.append(Atom('Fe', at.position)) Au_idx_lst.append(idx) else: atoms_all_Fe.append(Atom(at.symbol, at.position)) species = [] for at in atoms_all_Fe: species.append(at.symbol) species = list(set(species)) periodic_soap = SOAP( species=species, rcut=rcut, nmax=nmax, lmax=nmax, periodic=True, sparse=False) # print(Au_idx_lst, atoms_all_Fe.get_pbc(), species) soap_crystal = periodic_soap.create(atoms_all_Fe, positions=Au_idx_lst) # print(soap_crystal.shape, periodic_soap.get_number_of_features()) soaps.append(np.mean(soap_crystal, axis=0)) targets.append(([(row.data[predict_item])])) # print(soaps[-1].shape[0], targets[-1]) # print('-' * 100) return soaps, targets
def test_metrics(self): """Tests that different metrics as defined by scikit-learn can be used. """ # Create SOAP features for a system desc = SOAP([1, 8], 5.0, 2, 2, sigma=0.2, periodic=False, crossover=True, sparse=False) a = molecule('H2O') a_features = desc.create(a) # Linear dot-product kernel kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6) K = kernel.create([a_features, a_features]) # Gaussian kernel kernel = REMatchKernel(metric="rbf", gamma=1, alpha=0.1, threshold=1e-6) K = kernel.create([a_features, a_features]) # Laplacian kernel kernel = REMatchKernel(metric="laplacian", gamma=1, alpha=0.1, threshold=1e-6) K = kernel.create([a_features, a_features])
def update_soap_analysis(struct, all_kwargs): if not struct: raise PreventUpdate struct = self.from_data(struct) kwargs = self.reconstruct_kwargs_from_state( callback_context.inputs) # TODO: make sure is_int kwarg information is enforced so that int() conversion is unnecessary desc = SOAP( species=[e.number for e in struct.composition.elements], sigma=kwargs["sigma"], rcut=kwargs["rcut"], nmax=int(kwargs["nmax"]), lmax=int(kwargs["lmax"]), periodic=True, crossover=kwargs["crossover"], sparse=False, average=kwargs["average"], ) adaptor = AseAtomsAdaptor() atoms = adaptor.get_atoms(struct) feature = normalize(desc.create(atoms, n_jobs=cpu_count())) return _get_soap_graph(feature, "SOAP vector for this material")
class Atomic_Descriptor_SOAP(Atomic_Descriptor_Base): def __init__(self, desc_spec): """ make a DScribe SOAP object """ from dscribe.descriptors import SOAP if "type" not in desc_spec.keys() or desc_spec["type"] != "SOAP": raise ValueError( "Type is not SOAP or cannot find the type of the descriptor") # required try: self.species = desc_spec['species'] self.cutoff = desc_spec['cutoff'] self.g = desc_spec['atom_gaussian_width'] self.n = desc_spec['n'] self.l = desc_spec['l'] except: raise ValueError( "Not enough information to intialize the `Atomic_Descriptor_SOAP` object" ) # we have defaults here if 'rbf' in desc_spec.keys(): self.rbf = desc_spec['rbf'] else: self.rbf = 'gto' if 'crossover' in desc_spec.keys(): self.crossover = bool(desc_spec['crossover']) else: self.crossover = False if 'periodic' in desc_spec.keys(): self.periodic = bool(desc_spec['periodic']) else: self.periodic = True self.soap = SOAP(species=self.species, rcut=self.cutoff, nmax=self.n, lmax=self.l, sigma=self.g, rbf=self.rbf, crossover=self.crossover, average='off', periodic=self.periodic) print("Using SOAP Descriptors ...") # make an acronym self.acronym = "SOAP-n" + str(self.n) + "-l" + str( self.l) + "-c" + str(self.cutoff) + "-g" + str(self.g) def create(self, frame): # notice that we return the acronym here!!! return self.acronym, self.soap.create(frame, n_jobs=1)
def calc_soap_dscribe(atoms, parameters, atomic_numbers=None, periodic=True, sparse=False, rbf="polynomial"): """ Calculate the SOAP vector for the structure. Args ---- parameters: list SOAP parameters (r, sigma, n, l, zeta). r: radial cut-off. sigma: broadness of the Gaussian functions (smoothness). n: order to which the radial basis set is expanded to. l: order to which the angular basis set is expanded to. zeta: power to which the normalised SOAP kernel is raised. atomic_numbers: list Atomic numbers to include in the SOAP analysis. All elements that will be encountered need to be included. If None, will just include all elements in the structure. Note, undesirable behaviour may occur if comparing structures with differnet species if not all elements are included for both structures. periodic: bool Whether to construct a perioidic SOAP. sparse: rbf: str Radial basis function to use ("poylnomial" or DScribe's custom "gto" basis set). """ from dscribe.descriptors import SOAP # build for all atoms in structure. if atomic_numbers is None: atomic_numbers = atoms.get_atomic_numbers() # unpack the SOAP parameters. r, sigma, n, l, _ = parameters # using DScribe implementation of SOAP, create periodic calculator. p_soap = SOAP(species=np.unique(atomic_numbers), rcut=r, sigma=sigma, nmax=n, lmax=l, periodic=periodic, sparse=sparse, rbf=rbf) return p_soap.create(atoms)
def test_is_periodic(self): """Tests whether periodic images are seen by the descriptor""" desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=True,) H2O.set_pbc(False) nocell = desc.create(H2O, positions=[[0, 0, 0]]) H2O.set_pbc(True) H2O.set_cell([ [2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0] ]) desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=True,) cubic_cell = desc.create(H2O, positions=[[0, 0, 0]]) self.assertTrue(np.sum(cubic_cell) > 0)