Example #1
0
    def test_difference(self):
        """Tests that the similarity is correct.
        """
        # Create SOAP features for a system
        desc = SOAP(species=[1, 6, 7, 8],
                    rcut=5.0,
                    nmax=2,
                    lmax=2,
                    sigma=0.2,
                    periodic=False,
                    crossover=True,
                    sparse=False)

        # Calculate that identical molecules are identical.
        a = molecule("H2O")
        a_features = desc.create(a)
        kernel = AverageKernel(metric="linear")
        K = kernel.create([a_features, a_features])
        self.assertTrue(np.all(np.abs(K - 1) < 1e-3))

        # Check that completely different molecules are completely different
        a = molecule("N2")
        b = molecule("H2O")
        a_features = desc.create(a)
        b_features = desc.create(b)
        K = kernel.create([a_features, b_features])
        self.assertTrue(np.all(np.abs(K - np.eye(2)) < 1e-3))

        # Check that somewhat similar molecules are somewhat similar
        a = molecule("H2O")
        b = molecule("H2O2")
        a_features = desc.create(a)
        b_features = desc.create(b)
        K = kernel.create([a_features, b_features])
        self.assertTrue(K[0, 1] > 0.9)
Example #2
0
    def test_metrics(self):
        """Tests that different metrics as defined by scikit-learn can be used."""
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        a_features = desc.create(a)

        # Linear dot-product kernel
        kernel = AverageKernel(metric="linear")
        K = kernel.create([a_features, a_features])

        # Gaussian kernel
        kernel = AverageKernel(metric="rbf", gamma=1)
        K = kernel.create([a_features, a_features])

        # Laplacian kernel
        kernel = AverageKernel(metric="laplacian", gamma=1)
        K = kernel.create([a_features, a_features])
Example #3
0
    def test_xy(self):
        """Tests that the kernel can be also calculated between two different
        sets, which is necessary for making predictions with kernel-based
        methods.
        """
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        b = molecule("O2")
        c = molecule("H2O2")

        a_feat = desc.create(a)
        b_feat = desc.create(b)
        c_feat = desc.create(c)

        # Linear dot-product kernel
        kernel = AverageKernel(metric="linear")
        K = kernel.create([a_feat, b_feat], [c_feat])

        self.assertEqual(K.shape, (2, 1))
Example #4
0
    def test_convergence_infinity(self):
        """Tests that the REMatch kernel correctly converges to the average
        kernel at the the limit of infinite alpha.
        """
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        b = molecule("H2O2")
        a_features = desc.create(a)
        b_features = desc.create(b)

        # REMatch kernel with very high alpha
        kernel_re = REMatchKernel(metric="linear", alpha=1e20, threshold=1e-6)
        K_re = kernel_re.create([a_features, b_features])

        # Average kernel
        kernel_ave = AverageKernel(metric="linear")
        K_ave = kernel_ave.create([a_features, b_features])

        # Test approximate equality
        self.assertTrue(np.allclose(K_re, K_ave))
Example #5
0
def average_listcomp(desc_list):
    re = AverageKernel(metric='linear')
    av_comp_list = []
    loop_count = 0

    for i in range(0, len(desc_list) - 1):
        comp_pair = [desc_list[i], desc_list[i + 1][:, 0:len(desc_list[i][0])]]
        print([len(comp_pair[0]), len(comp_pair[1])])
        print([len(comp_pair[0][0]), len(comp_pair[1][0])])
        kern = re.create(comp_pair)
        av_comp_list.append(kern[0][1])
        loop_count += 1
        print(f'done {loop_count} comparisons')

    return av_comp_list
Example #6
0
 def test_sparse(self):
     """Tests that sparse features may also be used to construct the kernels.
     """
     # Create SOAP features for a system
     desc = SOAP(species=[1, 8],
                 rcut=5.0,
                 nmax=2,
                 lmax=2,
                 sigma=0.2,
                 periodic=False,
                 crossover=True,
                 sparse=True)
     a = molecule('H2O')
     a_feat = desc.create(a)
     kernel = AverageKernel(metric="linear")
     K = kernel.create([a_feat])
Example #7
0
def main(fxyz, dictxyz, prefix, soap_rcut, soap_g, soap_n, soap_l,
         soap_periodic, matrix_plot):
    """

    Generate the SOAP kernel matrix.

    Parameters
    ----------
    fxyz: string giving location of xyz file
    dictxyz: string giving location of xyz file that is used as a dictionary
    prefix: string giving the filename prefix
    soap_rcut: float giving the cutoff radius, default value is 3.0
    soap_g: float giving the atom width
    soap_n: int giving the maximum radial label
    soap_l: int giving the maximum angular label. Must be less than or equal to 9
    soap_periodic: string (True or False) indicating whether the system is periodic
    matrix_plot: string (True or False) indicating whether a plot of the kernel matrix
                 is to be generated
    """

    soap_periodic = bool(soap_periodic)
    fframes = []
    dictframes = []

    # read frames
    if fxyz != 'none':
        fframes = read(fxyz, ':')
        nfframes = len(fframes)
        print("read xyz file:", fxyz, ", a total of", nfframes, "frames")
    # read frames in the dictionary
    if dictxyz != 'none':
        dictframes = read(dictxyz, ':')
        ndictframes = len(dictframes)
        print("read xyz file used for a dictionary:", dictxyz, ", a total of",
              ndictframes, "frames")

    frames = dictframes + fframes
    nframes = len(frames)
    global_species = []
    for frame in frames:
        global_species.extend(frame.get_atomic_numbers())
        if not soap_periodic:
            frame.set_pbc([False, False, False])
    global_species = np.unique(global_species)
    print("a total of", nframes, "frames, with elements: ", global_species)

    if nframes > 1:
        # set up the soap descriptors
        soap_desc = SOAP(species=global_species,
                         rcut=soap_rcut,
                         nmax=soap_n,
                         lmax=soap_l,
                         sigma=soap_g,
                         crossover=False,
                         average=True,
                         periodic=soap_periodic)
    else:
        # if only one frame we compute the kernel matrix (kmat) between the atomic environments
        # within this frame
        soap_desc = SOAP(species=global_species,
                         rcut=soap_rcut,
                         nmax=soap_n,
                         lmax=soap_l,
                         sigma=soap_g,
                         crossover=False,
                         average=False,
                         periodic=soap_periodic)

    # compute soap finger prints
    fall = soap_desc.create(frames, n_jobs=8)

    # compute kmat
    fshape = np.shape(fall)
    re = AverageKernel(metric="linear")

    kNN = re.create(fall.reshape((fshape[0], 1, fshape[1])))

    # save
    np.savetxt(prefix + "-n" + str(soap_n) + "-l" + str(soap_l) + "-c" +
               str(soap_rcut) + "-g" + str(soap_g) + ".kmat",
               kNN,
               fmt='%4.8f')

    # plot
    if matrix_plot:
        plt.matshow(kNN)
        plt.title('Kernel matrix: ' + prefix)
        plt.show()
Example #8
0
from dscribe.descriptors import SOAP
from dscribe.kernels import AverageKernel

from ase.build import molecule

# We will compare two similar molecules
a = molecule("H2O")
b = molecule("H2O2")

# First we will have to create the features for atomic environments. Lets
# use SOAP.
desc = SOAP(species=[1, 6, 7, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False)
a_features = desc.create(a)
b_features = desc.create(b)

# Calculates the similarity with an average kernel and a linear metric. The
# result will be a full similarity matrix.
re = AverageKernel(metric="linear")
re_kernel = re.create([a_features, b_features])

# Any metric supported by scikit-learn will work: e.g. a Gaussian:
re = AverageKernel(metric="rbf", gamma=1)
re_kernel = re.create([a_features, b_features])
Example #9
0
                   rcut=r_cut,
                   nmax=nmax,
                   lmax=lmax,
                   periodic=True,
                   sparse=False)

#---------------------------------------------------------------------
#RUN SOAP ACROSS n FILES IN LIST AND OUTPUT COMPARISON KERNEL
#---------------------------------------------------------------------
tic_1 = time.perf_counter()
comparisons = [t2_per_soap.create(i) for i in structures]

metric = "linear"

re = AverageKernel(metric=metric)
kern = re.create(comparisons)

toc_1 = time.perf_counter()

comp_time = toc_1 - tic_1

print(
    f"Took {comp_time:.2} seconds to compare {ns} structures with r_cut = {r_cut:.2}, lmax = {lmax}, nmax = {nmax}"
)

#---------------------------------------------------------------------
#OUTPUT COMPARISON AS CSV FILE
#---------------------------------------------------------------------
soap_array = pd.DataFrame(kern, index=names, columns=names)
soap_array.to_csv(outputdir + "/soap_comparison_rcut = %s.csv" % r_cut,
                  index=True,
allkerndiffs = []

for nmax in range(1, 15):
    soapgen_rcut = SOAP(species=species,
                        rcut=rcut,
                        nmax=nmax,
                        lmax=lmax,
                        periodic=True,
                        sparse=False,
                        rbf='gto')
    descriptors = [soapgen_rcut.create(i) for i in structures]
    descdiffs.append(descriptors[1][0][0] - descriptors[0][0][0])

    tic_1 = time.perf_counter()
    re = AverageKernel(metric='linear')
    kern = re.create(descriptors)
    toc_1 = time.perf_counter()
    ctime.append(toc_1 - tic_1)
    kerndiffs.append(kern[0][1])

    tic_2 = time.perf_counter()
    normed = [normalize(i) for i in descriptors]
    rem = REMatchKernel(metric='rbf', gamma=1, alpha=1, threshold=1e-6)
    remkern = rem.create(descriptors)
    toc_2 = time.perf_counter()
    rectime.append(toc_2 - tic_2)
    remkerndiffs.append(remkern[0][1])

    allkerndiffs.append(abs(remkern[0][1] - kern[0][1]))

plt.plot(xax, ctime, label='Average Kernel')