def test_difference(self): """Tests that the similarity is correct. """ # Create SOAP features for a system desc = SOAP(species=[1, 6, 7, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False) # Calculate that identical molecules are identical. a = molecule("H2O") a_features = desc.create(a) kernel = AverageKernel(metric="linear") K = kernel.create([a_features, a_features]) self.assertTrue(np.all(np.abs(K - 1) < 1e-3)) # Check that completely different molecules are completely different a = molecule("N2") b = molecule("H2O") a_features = desc.create(a) b_features = desc.create(b) K = kernel.create([a_features, b_features]) self.assertTrue(np.all(np.abs(K - np.eye(2)) < 1e-3)) # Check that somewhat similar molecules are somewhat similar a = molecule("H2O") b = molecule("H2O2") a_features = desc.create(a) b_features = desc.create(b) K = kernel.create([a_features, b_features]) self.assertTrue(K[0, 1] > 0.9)
def test_metrics(self): """Tests that different metrics as defined by scikit-learn can be used.""" # Create SOAP features for a system desc = SOAP( species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False, ) a = molecule("H2O") a_features = desc.create(a) # Linear dot-product kernel kernel = AverageKernel(metric="linear") K = kernel.create([a_features, a_features]) # Gaussian kernel kernel = AverageKernel(metric="rbf", gamma=1) K = kernel.create([a_features, a_features]) # Laplacian kernel kernel = AverageKernel(metric="laplacian", gamma=1) K = kernel.create([a_features, a_features])
def test_xy(self): """Tests that the kernel can be also calculated between two different sets, which is necessary for making predictions with kernel-based methods. """ # Create SOAP features for a system desc = SOAP( species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False, ) a = molecule("H2O") b = molecule("O2") c = molecule("H2O2") a_feat = desc.create(a) b_feat = desc.create(b) c_feat = desc.create(c) # Linear dot-product kernel kernel = AverageKernel(metric="linear") K = kernel.create([a_feat, b_feat], [c_feat]) self.assertEqual(K.shape, (2, 1))
def test_convergence_infinity(self): """Tests that the REMatch kernel correctly converges to the average kernel at the the limit of infinite alpha. """ # Create SOAP features for a system desc = SOAP( species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False, ) a = molecule("H2O") b = molecule("H2O2") a_features = desc.create(a) b_features = desc.create(b) # REMatch kernel with very high alpha kernel_re = REMatchKernel(metric="linear", alpha=1e20, threshold=1e-6) K_re = kernel_re.create([a_features, b_features]) # Average kernel kernel_ave = AverageKernel(metric="linear") K_ave = kernel_ave.create([a_features, b_features]) # Test approximate equality self.assertTrue(np.allclose(K_re, K_ave))
def average_listcomp(desc_list): re = AverageKernel(metric='linear') av_comp_list = [] loop_count = 0 for i in range(0, len(desc_list) - 1): comp_pair = [desc_list[i], desc_list[i + 1][:, 0:len(desc_list[i][0])]] print([len(comp_pair[0]), len(comp_pair[1])]) print([len(comp_pair[0][0]), len(comp_pair[1][0])]) kern = re.create(comp_pair) av_comp_list.append(kern[0][1]) loop_count += 1 print(f'done {loop_count} comparisons') return av_comp_list
def test_sparse(self): """Tests that sparse features may also be used to construct the kernels. """ # Create SOAP features for a system desc = SOAP(species=[1, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=True) a = molecule('H2O') a_feat = desc.create(a) kernel = AverageKernel(metric="linear") K = kernel.create([a_feat])
def main(fxyz, dictxyz, prefix, soap_rcut, soap_g, soap_n, soap_l, soap_periodic, matrix_plot): """ Generate the SOAP kernel matrix. Parameters ---------- fxyz: string giving location of xyz file dictxyz: string giving location of xyz file that is used as a dictionary prefix: string giving the filename prefix soap_rcut: float giving the cutoff radius, default value is 3.0 soap_g: float giving the atom width soap_n: int giving the maximum radial label soap_l: int giving the maximum angular label. Must be less than or equal to 9 soap_periodic: string (True or False) indicating whether the system is periodic matrix_plot: string (True or False) indicating whether a plot of the kernel matrix is to be generated """ soap_periodic = bool(soap_periodic) fframes = [] dictframes = [] # read frames if fxyz != 'none': fframes = read(fxyz, ':') nfframes = len(fframes) print("read xyz file:", fxyz, ", a total of", nfframes, "frames") # read frames in the dictionary if dictxyz != 'none': dictframes = read(dictxyz, ':') ndictframes = len(dictframes) print("read xyz file used for a dictionary:", dictxyz, ", a total of", ndictframes, "frames") frames = dictframes + fframes nframes = len(frames) global_species = [] for frame in frames: global_species.extend(frame.get_atomic_numbers()) if not soap_periodic: frame.set_pbc([False, False, False]) global_species = np.unique(global_species) print("a total of", nframes, "frames, with elements: ", global_species) if nframes > 1: # set up the soap descriptors soap_desc = SOAP(species=global_species, rcut=soap_rcut, nmax=soap_n, lmax=soap_l, sigma=soap_g, crossover=False, average=True, periodic=soap_periodic) else: # if only one frame we compute the kernel matrix (kmat) between the atomic environments # within this frame soap_desc = SOAP(species=global_species, rcut=soap_rcut, nmax=soap_n, lmax=soap_l, sigma=soap_g, crossover=False, average=False, periodic=soap_periodic) # compute soap finger prints fall = soap_desc.create(frames, n_jobs=8) # compute kmat fshape = np.shape(fall) re = AverageKernel(metric="linear") kNN = re.create(fall.reshape((fshape[0], 1, fshape[1]))) # save np.savetxt(prefix + "-n" + str(soap_n) + "-l" + str(soap_l) + "-c" + str(soap_rcut) + "-g" + str(soap_g) + ".kmat", kNN, fmt='%4.8f') # plot if matrix_plot: plt.matshow(kNN) plt.title('Kernel matrix: ' + prefix) plt.show()
from dscribe.descriptors import SOAP from dscribe.kernels import AverageKernel from ase.build import molecule # We will compare two similar molecules a = molecule("H2O") b = molecule("H2O2") # First we will have to create the features for atomic environments. Lets # use SOAP. desc = SOAP(species=[1, 6, 7, 8], rcut=5.0, nmax=2, lmax=2, sigma=0.2, periodic=False, crossover=True, sparse=False) a_features = desc.create(a) b_features = desc.create(b) # Calculates the similarity with an average kernel and a linear metric. The # result will be a full similarity matrix. re = AverageKernel(metric="linear") re_kernel = re.create([a_features, b_features]) # Any metric supported by scikit-learn will work: e.g. a Gaussian: re = AverageKernel(metric="rbf", gamma=1) re_kernel = re.create([a_features, b_features])
rcut=r_cut, nmax=nmax, lmax=lmax, periodic=True, sparse=False) #--------------------------------------------------------------------- #RUN SOAP ACROSS n FILES IN LIST AND OUTPUT COMPARISON KERNEL #--------------------------------------------------------------------- tic_1 = time.perf_counter() comparisons = [t2_per_soap.create(i) for i in structures] metric = "linear" re = AverageKernel(metric=metric) kern = re.create(comparisons) toc_1 = time.perf_counter() comp_time = toc_1 - tic_1 print( f"Took {comp_time:.2} seconds to compare {ns} structures with r_cut = {r_cut:.2}, lmax = {lmax}, nmax = {nmax}" ) #--------------------------------------------------------------------- #OUTPUT COMPARISON AS CSV FILE #--------------------------------------------------------------------- soap_array = pd.DataFrame(kern, index=names, columns=names) soap_array.to_csv(outputdir + "/soap_comparison_rcut = %s.csv" % r_cut, index=True,
allkerndiffs = [] for nmax in range(1, 15): soapgen_rcut = SOAP(species=species, rcut=rcut, nmax=nmax, lmax=lmax, periodic=True, sparse=False, rbf='gto') descriptors = [soapgen_rcut.create(i) for i in structures] descdiffs.append(descriptors[1][0][0] - descriptors[0][0][0]) tic_1 = time.perf_counter() re = AverageKernel(metric='linear') kern = re.create(descriptors) toc_1 = time.perf_counter() ctime.append(toc_1 - tic_1) kerndiffs.append(kern[0][1]) tic_2 = time.perf_counter() normed = [normalize(i) for i in descriptors] rem = REMatchKernel(metric='rbf', gamma=1, alpha=1, threshold=1e-6) remkern = rem.create(descriptors) toc_2 = time.perf_counter() rectime.append(toc_2 - tic_2) remkerndiffs.append(remkern[0][1]) allkerndiffs.append(abs(remkern[0][1] - kern[0][1])) plt.plot(xax, ctime, label='Average Kernel')