Example #1
0
    def test_xy(self):
        """Tests that the kernel can be also calculated between two different
        sets, which is necessary for making predictions with kernel-based
        methods.
        """
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        b = molecule("O2")
        c = molecule("H2O2")

        a_feat = desc.create(a)
        b_feat = desc.create(b)
        c_feat = desc.create(c)

        # Linear dot-product kernel
        kernel = AverageKernel(metric="linear")
        K = kernel.create([a_feat, b_feat], [c_feat])

        self.assertEqual(K.shape, (2, 1))
Example #2
0
def coefficients_gto(system, centers, args):
    """Used to numerically calculate the inner product coefficients of SOAP
    with GTO radial basis.
    """
    nmax = args["nmax"]
    lmax = args["lmax"]
    rcut = args["rcut"]
    sigma = args["sigma"]
    weighting = args.get("weighting")

    positions = system.get_positions()
    symbols = system.get_chemical_symbols()
    atomic_numbers = system.get_atomic_numbers()
    species_ordered = sorted(list(set(atomic_numbers)))
    n_elems = len(species_ordered)

    # Calculate the weights and decays of the radial basis functions.
    soap = SOAP(**args)
    soap.create(system, positions=centers)
    alphas = np.reshape(soap._alphas, [lmax + 1, nmax])
    betas = np.reshape(soap._betas, [lmax + 1, nmax, nmax])

    def rbf_gto(r, n, l):
        i_alpha = alphas[l, 0:nmax]
        i_beta = betas[l, n, 0:nmax]
        return (i_beta * r ** l * np.exp(-i_alpha * r ** 2)).sum()

    return soap_integration(system, centers, args, rbf_gto)
Example #3
0
def soap_gto_vs_polynomial(version):
    """GTO vs polynomial RBF scaling.
    """
    nmax = 4
    lmax = 4
    fig = mpl.figure(figsize=[9, 7])
    ax = fig.add_subplot(111)
    ax.set_title("SOAP nmax={}, lmax={}, version={}".format(
        nmax, lmax, version))
    ax.set_xlabel("Number of atoms")
    ax.set_ylabel("Time (s)")

    for rbf in ["gto", "polynomial"]:
        N = []
        t = []
        for ncells in tqdm(range(5, 15)):
            soap_generator = SOAP(rcut=3.0,
                                  nmax=nmax,
                                  lmax=lmax,
                                  species=["Ni", "Ti"],
                                  rbf=rbf,
                                  crossover=True,
                                  periodic=True)
            i_system = system_periodic.copy() * ncells
            t0 = time()
            soap_generator.create(i_system)
            t1 = time()
            N.append(len(i_system))
            t.append(t1 - t0)

        ax.plot(N, t, "o--", label="{}".format(rbf))

    mpl.legend()
    mpl.show()
Example #4
0
    def test_periodic_images(self):
        """Tests the periodic images seen by the descriptor"""
        desc = SOAP(
            species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=False, crossover=True
        )

        molecule = H2O.copy()

        # Non-periodic for comparison
        molecule.set_cell([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
        nocell = desc.create(molecule, positions=[[0, 0, 0]])

        # Make periodic
        desc = SOAP(
            species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True
        )
        molecule.set_pbc(True)

        # Cubic
        molecule.set_cell([[3.0, 0.0, 0.0], [0.0, 3.0, 0.0], [0.0, 0.0, 3.0]])
        cubic_cell = desc.create(molecule, positions=[[0, 0, 0]])
        suce = molecule * (2, 1, 1)
        cubic_suce = desc.create(suce, positions=[[0, 0, 0]])

        # Triclinic
        molecule.set_cell([[0.0, 2.0, 2.0], [2.0, 0.0, 2.0], [2.0, 2.0, 0.0]])
        triclinic_cell = desc.create(molecule, positions=[[0, 0, 0]])
        suce = molecule * (2, 1, 1)
        triclinic_suce = desc.create(suce, positions=[[0, 0, 0]])

        self.assertTrue(np.sum(np.abs((nocell[:3] - cubic_suce[:3]))) > 0.1)
        self.assertAlmostEqual(np.sum(cubic_cell[:3] - cubic_suce[:3]), 0)
        self.assertAlmostEqual(np.sum(triclinic_cell[:3] - triclinic_suce[:3]), 0)
Example #5
0
    def test_xy(self):
        """Tests that the kernel can be also calculated between two different
        sets, which is necessary for making predictions with kernel-based
        methods.
        """
        # Create SOAP features for a system
        desc = SOAP([1, 8],
                    5.0,
                    2,
                    2,
                    sigma=0.2,
                    periodic=False,
                    crossover=True,
                    sparse=False)
        a = molecule('H2O')
        b = molecule('O2')
        c = molecule('H2O2')

        a_feat = desc.create(a)
        b_feat = desc.create(b)
        c_feat = desc.create(c)

        # Linear dot-product kernel
        kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6)
        K = kernel.create([a_feat, b_feat], [c_feat])

        self.assertEqual(K.shape, (2, 1))
Example #6
0
    def test_difference(self):
        """Tests that the similarity is correct.
        """
        # Create SOAP features for a system
        desc = SOAP(species=[1, 6, 7, 8],
                    rcut=5.0,
                    nmax=2,
                    lmax=2,
                    sigma=0.2,
                    periodic=False,
                    crossover=True,
                    sparse=False)

        # Calculate that identical molecules are identical.
        a = molecule("H2O")
        a_features = desc.create(a)
        kernel = AverageKernel(metric="linear")
        K = kernel.create([a_features, a_features])
        self.assertTrue(np.all(np.abs(K - 1) < 1e-3))

        # Check that completely different molecules are completely different
        a = molecule("N2")
        b = molecule("H2O")
        a_features = desc.create(a)
        b_features = desc.create(b)
        K = kernel.create([a_features, b_features])
        self.assertTrue(np.all(np.abs(K - np.eye(2)) < 1e-3))

        # Check that somewhat similar molecules are somewhat similar
        a = molecule("H2O")
        b = molecule("H2O2")
        a_features = desc.create(a)
        b_features = desc.create(b)
        K = kernel.create([a_features, b_features])
        self.assertTrue(K[0, 1] > 0.9)
Example #7
0
    def test_multiple_species(self):
        """Tests multiple species are handled correctly.
        """
        lmax = 5
        nmax = 5
        species = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
        desc = SOAP(species=species,
                    rcut=5,
                    nmax=nmax,
                    lmax=lmax,
                    periodic=False,
                    sparse=False)

        pos = np.expand_dims(np.linspace(0, 8, 8), 1)
        pos = np.hstack((pos, pos, pos))
        sys = Atoms(symbols=species[0:8], positions=pos, pbc=False)
        vec1 = desc.create(sys)

        sys2 = Atoms(symbols=species[8:], positions=pos, pbc=False)
        vec2 = desc.create(sys2)

        sys3 = Atoms(symbols=species[4:12], positions=pos, pbc=False)
        vec3 = desc.create(sys3)

        dot1 = np.dot(vec1[6, :], vec2[6, :])
        dot2 = np.dot(vec1[3, :], vec3[3, :])
        dot3 = np.dot(vec2[3, :], vec3[3, :])

        # The dot product for systems without overlap in species should be zero
        self.assertTrue(abs(dot1) <= 1e-8)

        # The systems with overlap in the elements should have onerlap in the
        # dot product
        self.assertTrue(abs(dot2) > 1e-3)
        self.assertTrue(abs(dot3) > 1e-3)
Example #8
0
    def test_average(self):
        """Tests that the average output is created correctly.
        """
        sys = Atoms(symbols=["H", "C"], positions=[[-1, 0, 0], [1, 0, 0]], cell=[2, 2, 2], pbc=True)

        # Create the average output
        desc = SOAP(
            atomic_numbers=[1, 6, 8],
            rcut=5,
            nmax=3,
            lmax=5,
            periodic=False,
            crossover=True,
            average=True,
            sparse=False
        )
        average = desc.create(sys)[0, :]

        # Create individual output for both atoms
        desc = SOAP(
            atomic_numbers=[1, 6, 8],
            rcut=5,
            nmax=3,
            lmax=5,
            periodic=False,
            crossover=True,
            average=False,
            sparse=False
        )
        first = desc.create(sys, positions=[0])[0, :]
        second = desc.create(sys, positions=[1])[0, :]

        # Check that the averaging is done correctlyl
        assumed_average = (first+second)/2
        self.assertTrue(np.array_equal(average, assumed_average))
Example #9
0
    def test_constructor(self):
        """Tests different valid and invalid constructor values.
        """
        # Invalid gaussian width
        with self.assertRaises(ValueError):
            SOAP(species=[-1, 2], rcut=5, sigma=0, nmax=5, lmax=5, periodic=True)
        with self.assertRaises(ValueError):
            SOAP(species=[-1, 2], rcut=5, sigma=-1, nmax=5, lmax=5, periodic=True)

        # Invalid rcut
        with self.assertRaises(ValueError):
            SOAP(species=[-1, 2], rcut=0.5, sigma=0, nmax=5, lmax=5, periodic=True)

        # Invalid lmax
        with self.assertRaises(ValueError):
            SOAP(species=[-1, 2], rcut=0.5, sigma=0, nmax=5, lmax=10, rbf="gto", periodic=True)

        # Invalid nmax
        with self.assertRaises(ValueError):
            SOAP(species=["H", "O"], rcut=4, sigma=1, nmax=0, lmax=8, rbf="gto", periodic=True)

        # Too high radial basis set density: poly
        with self.assertRaises(ValueError):
            a = SOAP(species=["H", "O"], rcut=10, sigma=0.5, nmax=12, lmax=8, rbf="polynomial", periodic=False)
            a.create(H2O)

        # Too high radial basis set density: gto
        with self.assertRaises(ValueError):
            a = SOAP(species=["H", "O"], rcut=10, sigma=0.5, nmax=20, lmax=8, rbf="gto", periodic=False)
            a.create(H2O)
Example #10
0
    def test_is_periodic(self):
        """Tests whether periodic images are seen by the descriptor"""
        system = H2O.copy()

        desc = SOAP(
            species=[1, 6, 8],
            rcut=10.0,
            nmax=2,
            lmax=0,
            periodic=False,
            crossover=True,
        )

        system.set_pbc(False)
        nocell = desc.create(system, positions=[[0, 0, 0]])

        system.set_pbc(True)
        system.set_cell([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]])
        desc = SOAP(
            species=[1, 6, 8], rcut=10.0, nmax=2, lmax=0, periodic=True, crossover=True
        )

        cubic_cell = desc.create(system, positions=[[0, 0, 0]])

        self.assertTrue(np.sum(cubic_cell) > 0)
Example #11
0
    def test_basis(self):
        """Tests that the output vectors behave correctly as a basis.
        """
        sys1 = Atoms(symbols=["H", "H"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True)
        sys2 = Atoms(symbols=["O", "O"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True)
        sys3 = Atoms(symbols=["C", "C"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True)
        sys4 = Atoms(symbols=["H", "C"], positions=[[-1, 0, 0], [1, 0, 0]], cell=[2, 2, 2], pbc=True)
        sys5 = Atoms(symbols=["H", "C"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True)
        sys6 = Atoms(symbols=["H", "O"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True)
        sys7 = Atoms(symbols=["C", "O"], positions=[[1, 0, 0], [0, 1, 0]], cell=[2, 2, 2], pbc=True)

        desc = SOAP(
            atomic_numbers=[1, 6, 8],
            rcut=5,
            nmax=3,
            lmax=5,
            periodic=False,
            crossover=True,
            sparse=False
        )

        # Create vectors for each system
        vec1 = desc.create(sys1, positions=[[0, 0, 0]])[0, :]
        vec2 = desc.create(sys2, positions=[[0, 0, 0]])[0, :]
        vec3 = desc.create(sys3, positions=[[0, 0, 0]])[0, :]
        vec4 = desc.create(sys4, positions=[[0, 0, 0]])[0, :]
        vec5 = desc.create(sys5, positions=[[0, 0, 0]])[0, :]
        vec6 = desc.create(sys6, positions=[[0, 0, 0]])[0, :]
        vec7 = desc.create(sys7, positions=[[0, 0, 0]])[0, :]

        # The dot-product should be zero when there are no overlapping elements
        dot = np.dot(vec1, vec2)
        self.assertEqual(dot, 0)
        dot = np.dot(vec2, vec3)
        self.assertEqual(dot, 0)

        # The dot-product should be non-zero when there are overlapping elements
        dot = np.dot(vec4, vec5)
        self.assertNotEqual(dot, 0)

        # Check that self-terms are in correct location
        n_elem_feat = desc.get_number_of_element_features()
        h_part1 = vec1[0:n_elem_feat]
        h_part2 = vec2[0:n_elem_feat]
        h_part4 = vec4[0:n_elem_feat]
        self.assertNotEqual(np.sum(h_part1), 0)
        self.assertEqual(np.sum(h_part2), 0)
        self.assertNotEqual(np.sum(h_part4), 0)

        # Check that cross terms are in correct location
        hc_part1 = vec1[1*n_elem_feat:2*n_elem_feat]
        hc_part4 = vec4[1*n_elem_feat:2*n_elem_feat]
        co_part6 = vec6[4*n_elem_feat:5*n_elem_feat]
        co_part7 = vec7[4*n_elem_feat:5*n_elem_feat]
        self.assertEqual(np.sum(hc_part1), 0)
        self.assertNotEqual(np.sum(hc_part4), 0)
        self.assertEqual(np.sum(co_part6), 0)
        self.assertNotEqual(np.sum(co_part7), 0)
Example #12
0
    def test_crossover(self):
        """Tests that disabling/enabling crossover works as expected."""
        pos = [[0.1, 0.1, 0.1]]
        species = [1, 8]
        nmax = 5
        lmax = 5

        # GTO
        desc = SOAP(
            species=species,
            rbf="gto",
            crossover=True,
            rcut=3,
            nmax=nmax,
            lmax=lmax,
            periodic=False,
        )
        hh_loc_full = desc.get_location(("H", "H"))
        oo_loc_full = desc.get_location(("O", "O"))
        full_output = desc.create(H2O, positions=pos)
        desc.crossover = False
        hh_loc = desc.get_location(("H", "H"))
        oo_loc = desc.get_location(("O", "O"))
        partial_output = desc.create(H2O, positions=pos)
        self.assertTrue(oo_loc_full != oo_loc)
        self.assertTrue(
            np.array_equal(full_output[:, hh_loc_full], partial_output[:, hh_loc])
        )
        self.assertTrue(
            np.array_equal(full_output[:, oo_loc_full], partial_output[:, oo_loc])
        )

        # Polynomial
        desc = SOAP(
            species=species,
            rbf="polynomial",
            crossover=True,
            rcut=3,
            nmax=lmax,
            lmax=lmax,
            periodic=False,
        )
        hh_loc_full = desc.get_location(("H", "H"))
        oo_loc_full = desc.get_location(("O", "O"))
        full_output = desc.create(H2O, pos)
        desc.crossover = False
        hh_loc = desc.get_location(("H", "H"))
        oo_loc = desc.get_location(("O", "O"))
        partial_output = desc.create(H2O, pos)
        self.assertTrue(oo_loc_full != oo_loc)
        self.assertTrue(
            np.array_equal(full_output[:, hh_loc_full], partial_output[:, hh_loc])
        )
        self.assertTrue(
            np.array_equal(full_output[:, oo_loc_full], partial_output[:, oo_loc])
        )
Example #13
0
def test_soap(version):
    """Tests how the SOAP descriptor calculation scales with system size.
    """
    nmax = 4
    lmax = 4
    fig = mpl.figure(figsize=[9, 7])
    ax = fig.add_subplot(111)
    ax.set_title("SOAP nmax={}, lmax={}, version={}".format(
        nmax, lmax, version))
    ax.set_xlabel("Number of atoms")
    ax.set_ylabel("Time (s)")

    for rbf in ["gto", "polynomial"]:

        N = []
        t = []
        # Loop over different system sizes
        for ncells in tqdm(range(5, 20)):

            natoms = 2 * ncells**3
            soap_generator = SOAP(rcut=3.0,
                                  nmax=nmax,
                                  lmax=lmax,
                                  species=["Ni", "Ti"],
                                  rbf=rbf,
                                  crossover=True,
                                  periodic=True)

            a = 2.993
            niti = Atoms(
                "NiTi",
                positions=[[0.0, 0.0, 0.0], [a / 2, a / 2, a / 2]],
                cell=[a, a, a],
                pbc=[1, 1, 1],
            )

            # Replicate system
            niti = niti * ncells
            a *= ncells

            t0 = time()
            soap_generator.create(niti)
            t1 = time()

            N.append(natoms)
            t.append(t1 - t0)

        N = np.array(N)
        t = np.array(t)

        ax.plot(N, t, "o--", label="{}".format(rbf))

    mpl.legend()
    mpl.savefig("soap_scaling_{}.pdf".format(version))
Example #14
0
    def test_sparse(self):
        """Tests the sparse matrix creation."""
        # Dense
        desc = SOAP(species=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=False)
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == np.ndarray)

        # Sparse
        desc = SOAP(species=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=True)
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == sparse.COO)
Example #15
0
    def test_sparse(self):
        """Tests the sparse matrix creation.
        """
        # Dense
        desc = SOAP(atomic_numbers=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=False)
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == np.ndarray)

        # Sparse
        desc = SOAP(atomic_numbers=[1, 8], rcut=5, nmax=5, lmax=5, periodic=True, sparse=True)
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == scipy.sparse.coo_matrix)
Example #16
0
    def _featurize(self, struct: PymatgenStructure) -> np.ndarray:
        """Calculate SOAP descriptor from pymatgen structure.
        
        Parameters
        ----------
        struct: pymatgen.Structure
            A periodic crystal composed of a lattice and a sequence of atomic
            sites with 3D coordinates and elements.
            
        Returns
        -------
        features: np.ndarray
            soap descriptor
        """

        soap = SOAP(
            periodic=self.periodic,
            species=self.species,
            rcut=self.rcut,
            nmax=self.nmax,
            lmax=self.lmax,
            rbf=self.rbf,
            sigma=self.sigma,
            average=self.average,
        )
        if self.convert:
            adaptor = AseAtomsAdaptor()
            struct = adaptor.get_atoms(struct)
        features = soap.create(struct)

        features = np.asarray(features)
        return features
Example #17
0
    def test_metrics(self):
        """Tests that different metrics as defined by scikit-learn can be used."""
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        a_features = desc.create(a)

        # Linear dot-product kernel
        kernel = AverageKernel(metric="linear")
        K = kernel.create([a_features, a_features])

        # Gaussian kernel
        kernel = AverageKernel(metric="rbf", gamma=1)
        K = kernel.create([a_features, a_features])

        # Laplacian kernel
        kernel = AverageKernel(metric="laplacian", gamma=1)
        K = kernel.create([a_features, a_features])
Example #18
0
    def test_convergence_infinity(self):
        """Tests that the REMatch kernel correctly converges to the average
        kernel at the the limit of infinite alpha.
        """
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        b = molecule("H2O2")
        a_features = desc.create(a)
        b_features = desc.create(b)

        # REMatch kernel with very high alpha
        kernel_re = REMatchKernel(metric="linear", alpha=1e20, threshold=1e-6)
        K_re = kernel_re.create([a_features, b_features])

        # Average kernel
        kernel_ave = AverageKernel(metric="linear")
        K_ave = kernel_ave.create([a_features, b_features])

        # Test approximate equality
        self.assertTrue(np.allclose(K_re, K_ave))
Example #19
0
def create_soap(nmax, lmax, rcut, procs):
    species = ['Au']
    soap = SOAP(
        species=species,
        periodic=False,
        rcut=rcut,
        nmax=nmax,
        lmax=lmax,
    )
    print('Calculating SOAPs on', procs, 'procs')
    print('nmax, lmax, rcut =', nmax, lmax, rcut)
    start = time.time()
    soap_struc = soap.create(struc, grid, n_jobs=procs)
    elapsed = time.time() - start
    print('DONE in ', elapsed, '\n')
    print('Shape of SOAPs:', soap_struc.shape)

    print('Writing SOAPs to disk...')
    start = time.time()
    write_pickle(
        soap_struc, path + 'soap_n_' + str(nmax) + '_l_' + str(lmax) + '_r_' +
        str(rcut) + '_p_' + str(procs))
    elapsed = time.time() - start
    print('DONE in ', elapsed, '\n')
    print('ALL DONE')
Example #20
0
    def test_get_location_w_crossover(self):
        """Tests that disabling/enabling crossover works as expected.
        """
        # With crossover
        species = ["H", "O", "C"]
        desc = SOAP(species=species,
                    rbf="gto",
                    crossover=True,
                    rcut=3,
                    nmax=5,
                    lmax=5,
                    periodic=False)

        # Symbols
        loc_hh = desc.get_location(("H", "H"))
        loc_ho = desc.get_location(("H", "O"))
        loc_oh = desc.get_location(("O", "H"))
        loc_oo = desc.get_location(("O", "O"))
        loc_cc = desc.get_location(("C", "C"))
        loc_co = desc.get_location(("C", "O"))
        loc_ch = desc.get_location(("C", "H"))

        # Undefined elements
        with self.assertRaises(ValueError):
            desc.get_location((2, 1))
        with self.assertRaises(ValueError):
            desc.get_location(("He", "H"))

        # Check that slices in the output are correctly empty or filled
        co2 = molecule("CO2")
        h2o = molecule("H2O")
        co2_out = desc.create(co2)
        h2o_out = desc.create(h2o)

        # Check that slices with reversed atomic numbers are identical
        self.assertTrue(loc_ho == loc_oh)

        # H-H
        self.assertTrue(co2_out[:, loc_hh].sum() == 0)
        self.assertTrue(h2o_out[:, loc_hh].sum() != 0)

        # H-C
        self.assertTrue(co2_out[:, loc_ch].sum() == 0)
        self.assertTrue(h2o_out[:, loc_ch].sum() == 0)

        # H-O
        self.assertTrue(co2_out[:, loc_ho].sum() == 0)
        self.assertTrue(h2o_out[:, loc_ho].sum() != 0)

        # C-O
        self.assertTrue(co2_out[:, loc_co].sum() != 0)
        self.assertTrue(h2o_out[:, loc_co].sum() == 0)

        # C-C
        self.assertTrue(co2_out[:, loc_cc].sum() != 0)
        self.assertTrue(h2o_out[:, loc_cc].sum() == 0)

        # O-O
        self.assertTrue(co2_out[:, loc_oo].sum() != 0)
        self.assertTrue(h2o_out[:, loc_oo].sum() != 0)
Example #21
0
class SOAPConverter(BaseEstimator, TransformerMixin):
    """Compute the SOAP descriptors for molecules"""
    def __init__(self,
                 rcut: float = 6,
                 nmax: int = 8,
                 lmax: int = 6,
                 species=frozenset({'C', 'O', 'H', 'N', 'F'})):
        """Initialize the converter
        
        Args:
            rcut (float); Cutoff radius
            nmax (int):
            lmax (int):
            species (Iterable): List of elements to include in potential
        """
        super().__init__()
        self.soap = SOAP(rcut=rcut,
                         nmax=nmax,
                         lmax=lmax,
                         species=sorted(species))

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        return [self.soap.create(x) for x in X]
Example #22
0
def getSOAPs(geometries, species,
        rcut, sigma, nmax = 10, lmax = 9, 
             periodic = True, crossover = True, sparse = False):
    """
    Takes a Series of geometries with one He present,
        returns SOAP representation of the chemical environment of He for each item
    Assumes any given structure in ``geometries`` has the same collection of elements
        as all the other structures
    Assumes any given structure in ``geometries`` has the same number of atoms as all
        the other structures

    Input:
        geometries: Series of Atoms objects; each must contain exactly 1 He atom
        rcut, nmax, lmax, sigma, periodic, crossover, sparse: SOAP parameters
    Output:
        output: Series of SOAP matrices, each corresponding to the appropriate index
    """
#   refgeom = geometries.iloc[0] #use the first geometry as a reference geometry

    ## set up descriptor
#   species = np.unique([i.symbol for i in refgeom])
    desc = SOAP(species=species, rcut = rcut, nmax = nmax, lmax = lmax,
                sigma = sigma, periodic = periodic, crossover = crossover, sparse = sparse)
    ## apply descriptor
    soaps = {}
    for i, geom in geometries.iteritems():
        HeLoc = len(geom) - 1  # assume He atom is last one in Atoms list
        tempSOAP = preprocessing.normalize(
            desc.create(geom, positions = [HeLoc], n_jobs = 4)) # SOAP representation of temp
        soaps[i] = tempSOAP[0]
    return pd.Series(soaps,name = 'SOAP')
Example #23
0
def createDescriptorsAllSOAP(data,
                             species,
                             sigma_SOAP,
                             cutoff_SOAP,
                             nmax_SOAP,
                             lmax_SOAP,
                             periodic,
                             sparse_SOAP=default_sparse_SOAP):

    # Initialize SOAP
    soap = SOAP(species=species,
                sigma=sigma_SOAP,
                periodic=periodic,
                rcut=cutoff_SOAP,
                nmax=nmax_SOAP,
                lmax=lmax_SOAP,
                sparse=sparse_SOAP)

    # Compute number of features
    n_features = soap.get_number_of_features()
    n_atoms = np.shape(data[0])[0]
    n_steps = len(data)
    # Computing descriptors
    descriptors = np.empty((n_atoms, n_steps, n_features), dtype=object)
    for index_structure in tqdm.tqdm(range(n_steps)):
        descriptors[:, index_structure, :] = soap.create(data[index_structure])
    descriptors_ = []
    for atom in range(n_atoms):
        descriptors_.append(descriptors[atom, :, :])
    return descriptors_
Example #24
0
    def test_poly_integration(self):
        """Tests that the partial power spectrum with the polynomial basis done
        with C corresponds to the easier-to-code but less performant
        integration done with python.
        """
        # Calculate mostly analytical (radial part is integrated numerically)
        # power spectrum
        system, centers, args = get_soap_polynomial_lmax_setup()
        soap = SOAP(**args, rbf="polynomial", dtype="float64")
        analytical_power_spectrum = soap.create(system, positions=centers)

        # Calculate numerical power spectrum
        coeffs = load_polynomial_coefficients(args)
        numerical_power_spectrum = self.get_power_spectrum(
            coeffs, crossover=args["crossover"]
        )

        # print("Numerical: {}".format(numerical_power_spectrum))
        # print("Analytical: {}".format(analytical_power_spectrum))
        # print(analytical_power_spectrum.dtype)
        self.assertTrue(
            np.allclose(
                numerical_power_spectrum,
                analytical_power_spectrum,
                atol=1e-15,
                rtol=0.01,
            )
        )
Example #25
0
def extract_descriptor(rows):
    soaps, targets = [], []
    for row in rows:
        atoms_Au_Fe = row.toatoms()
        atoms_all_Fe = Atoms()
        atoms_all_Fe.set_cell(atoms_Au_Fe.get_cell())
        atoms_all_Fe.set_pbc(atoms_Au_Fe.get_pbc())
        Au_idx_lst = []
        for idx, at in enumerate(atoms_Au_Fe):
            if at.symbol == 'Fe':
                atoms_all_Fe.append(Atom(at.symbol, at.position))
            elif at.symbol == 'Au':
                atoms_all_Fe.append(Atom('Fe', at.position))
                Au_idx_lst.append(idx)
            else:
                atoms_all_Fe.append(Atom(at.symbol, at.position))
        species = []
        for at in atoms_all_Fe:
            species.append(at.symbol)
        species = list(set(species))
        periodic_soap = SOAP(
            species=species,
            rcut=rcut,
            nmax=nmax,
            lmax=nmax,
            periodic=True,
            sparse=False)
        # print(Au_idx_lst, atoms_all_Fe.get_pbc(), species)
        soap_crystal = periodic_soap.create(atoms_all_Fe, positions=Au_idx_lst)
        # print(soap_crystal.shape, periodic_soap.get_number_of_features())
        soaps.append(np.mean(soap_crystal, axis=0))
        targets.append(([(row.data[predict_item])]))
        # print(soaps[-1].shape[0], targets[-1])
        # print('-' * 100)
    return soaps, targets
Example #26
0
    def test_metrics(self):
        """Tests that different metrics as defined by scikit-learn can be used.
        """
        # Create SOAP features for a system
        desc = SOAP([1, 8],
                    5.0,
                    2,
                    2,
                    sigma=0.2,
                    periodic=False,
                    crossover=True,
                    sparse=False)
        a = molecule('H2O')
        a_features = desc.create(a)

        # Linear dot-product kernel
        kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6)
        K = kernel.create([a_features, a_features])

        # Gaussian kernel
        kernel = REMatchKernel(metric="rbf",
                               gamma=1,
                               alpha=0.1,
                               threshold=1e-6)
        K = kernel.create([a_features, a_features])

        # Laplacian kernel
        kernel = REMatchKernel(metric="laplacian",
                               gamma=1,
                               alpha=0.1,
                               threshold=1e-6)
        K = kernel.create([a_features, a_features])
Example #27
0
        def update_soap_analysis(struct, all_kwargs):

            if not struct:
                raise PreventUpdate

            struct = self.from_data(struct)
            kwargs = self.reconstruct_kwargs_from_state(
                callback_context.inputs)

            # TODO: make sure is_int kwarg information is enforced so that int() conversion is unnecessary
            desc = SOAP(
                species=[e.number for e in struct.composition.elements],
                sigma=kwargs["sigma"],
                rcut=kwargs["rcut"],
                nmax=int(kwargs["nmax"]),
                lmax=int(kwargs["lmax"]),
                periodic=True,
                crossover=kwargs["crossover"],
                sparse=False,
                average=kwargs["average"],
            )

            adaptor = AseAtomsAdaptor()
            atoms = adaptor.get_atoms(struct)
            feature = normalize(desc.create(atoms, n_jobs=cpu_count()))

            return _get_soap_graph(feature, "SOAP vector for this material")
Example #28
0
class Atomic_Descriptor_SOAP(Atomic_Descriptor_Base):
    def __init__(self, desc_spec):
        """
        make a DScribe SOAP object
        """

        from dscribe.descriptors import SOAP

        if "type" not in desc_spec.keys() or desc_spec["type"] != "SOAP":
            raise ValueError(
                "Type is not SOAP or cannot find the type of the descriptor")

        # required
        try:
            self.species = desc_spec['species']
            self.cutoff = desc_spec['cutoff']
            self.g = desc_spec['atom_gaussian_width']
            self.n = desc_spec['n']
            self.l = desc_spec['l']
        except:
            raise ValueError(
                "Not enough information to intialize the `Atomic_Descriptor_SOAP` object"
            )

        # we have defaults here
        if 'rbf' in desc_spec.keys():
            self.rbf = desc_spec['rbf']
        else:
            self.rbf = 'gto'

        if 'crossover' in desc_spec.keys():
            self.crossover = bool(desc_spec['crossover'])
        else:
            self.crossover = False

        if 'periodic' in desc_spec.keys():
            self.periodic = bool(desc_spec['periodic'])
        else:
            self.periodic = True

        self.soap = SOAP(species=self.species,
                         rcut=self.cutoff,
                         nmax=self.n,
                         lmax=self.l,
                         sigma=self.g,
                         rbf=self.rbf,
                         crossover=self.crossover,
                         average='off',
                         periodic=self.periodic)

        print("Using SOAP Descriptors ...")

        # make an acronym
        self.acronym = "SOAP-n" + str(self.n) + "-l" + str(
            self.l) + "-c" + str(self.cutoff) + "-g" + str(self.g)

    def create(self, frame):
        # notice that we return the acronym here!!!
        return self.acronym, self.soap.create(frame, n_jobs=1)
Example #29
0
def calc_soap_dscribe(atoms,
                      parameters,
                      atomic_numbers=None,
                      periodic=True,
                      sparse=False,
                      rbf="polynomial"):
    """
    Calculate the SOAP vector for the structure.

    Args
    ----
    parameters: list
        SOAP parameters (r, sigma, n, l, zeta).
        
        r: radial cut-off.
        sigma: broadness of the Gaussian functions (smoothness).
        n: order to which the radial basis set is expanded to.
        l: order to which the angular basis set is expanded to.
        zeta: power to which the normalised SOAP kernel is raised.

    atomic_numbers: list
        Atomic numbers to include in the SOAP analysis. All elements that will
        be encountered need to be included. If None, will just include all
        elements in the structure. Note, undesirable behaviour may occur if
        comparing structures with differnet species if not all elements are
        included for both structures.

    periodic: bool
        Whether to construct a perioidic SOAP.

    sparse:

    rbf: str
        Radial basis function to use ("poylnomial" or DScribe's custom "gto"
        basis set).
    """

    from dscribe.descriptors import SOAP

    # build for all atoms in structure.
    if atomic_numbers is None:
        atomic_numbers = atoms.get_atomic_numbers()

    # unpack the SOAP parameters.
    r, sigma, n, l, _ = parameters

    # using DScribe implementation of SOAP, create periodic calculator.
    p_soap = SOAP(species=np.unique(atomic_numbers),
                  rcut=r,
                  sigma=sigma,
                  nmax=n,
                  lmax=l,
                  periodic=periodic,
                  sparse=sparse,
                  rbf=rbf)

    return p_soap.create(atoms)
Example #30
0
    def test_is_periodic(self):
        """Tests whether periodic images are seen by the descriptor"""
        desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=False, crossover=True,)

        H2O.set_pbc(False)
        nocell = desc.create(H2O, positions=[[0, 0, 0]])

        H2O.set_pbc(True)
        H2O.set_cell([
            [2.0, 0.0, 0.0],
            [0.0, 2.0, 0.0],
            [0.0, 0.0, 2.0]
        ])
        desc = SOAP([1, 6, 8], 10.0, 2, 0, periodic=True, crossover=True,)

        cubic_cell = desc.create(H2O, positions=[[0, 0, 0]])

        self.assertTrue(np.sum(cubic_cell) > 0)