Example #1
0
    def __init__(self, desc_spec):
        """
        make a DScribe CM object
        """

        from dscribe.descriptors import CoulombMatrix

        if "type" not in desc_spec.keys() or desc_spec["type"] != "CM":
            raise ValueError(
                "Type is not CM or cannot find the type of the descriptor")

        # required
        try:
            self.max_atoms = desc_spec['max_atoms']
        except:
            raise ValueError(
                "Not enough information to intialize the `Atomic_Descriptor_CM` object"
            )

        if 'periodic' in desc_spec.keys() and desc_spec['periodic'] == True:
            raise ValueError(
                "Coulomb Matrix cannot be used for periodic systems")

        self.cm = CoulombMatrix(self.max_atoms)
        print("Using CoulombMatrix ...")
        # make an acronym
        self.acronym = "CM" + "-" + str(self.max_atoms)
Example #2
0
    def test_features(self):
        """Tests that the correct features are present in the desciptor.
        """
        desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=False)
        cm = desc.create(H2O)

        # Test against assumed values
        q = H2O.get_atomic_numbers()
        p = H2O.get_positions()
        norm = np.linalg.norm
        assumed = np.array([
            [
                0.5 * q[0]**2.4, q[0] * q[1] / (norm(p[0] - p[1])),
                q[0] * q[2] / (norm(p[0] - p[2]))
            ],
            [
                q[1] * q[0] / (norm(p[1] - p[0])), 0.5 * q[1]**2.4,
                q[1] * q[2] / (norm(p[1] - p[2]))
            ],
            [
                q[2] * q[0] / (norm(p[2] - p[0])),
                q[2] * q[1] / (norm(p[2] - p[1])), 0.5 * q[2]**2.4
            ],
        ])
        zeros = np.zeros((5, 5))
        zeros[:3, :3] = assumed
        assumed = zeros

        self.assertTrue(np.array_equal(cm, assumed))
Example #3
0
 def test_constructor(self):
     """Tests different valid and invalid constructor values.
     """
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="unknown")
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=-1)
Example #4
0
 def test_number_of_features(self):
     """Tests that the reported number of features is correct.
     """
     desc = CoulombMatrix(n_atoms_max=5,
                          permutation="sorted_l2",
                          flatten=False)
     n_features = desc.get_number_of_features()
     self.assertEqual(n_features, 25)
Example #5
0
    def test_features(self):
        """Tests that the correct features are present in the desciptor."""
        desc = CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", flatten=False)
        cm = desc.create(H2O)

        lens = np.linalg.norm(cm, axis=1)
        old_len = lens[0]
        for length in lens[1:]:
            self.assertTrue(length <= old_len)
            old_len = length
Example #6
0
 def test_constructor(self):
     """Tests different valid and invalid constructor values.
     """
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="random", sigma=None)
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", sigma=3)
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="none", sigma=3)
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="eigenspectrum", sigma=3)
Example #7
0
 def test_exceptions(self):
     """Tests different invalid parameters that should raise an
     exception.
     """
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="random", sigma=None)
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", sigma=3)
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="none", sigma=3)
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="eigenspectrum", sigma=3)
Example #8
0
    def test_norm_vector(self):
        """Tests if the attribute _norm_vector is written and used correctly
        """
        desc = CoulombMatrix(n_atoms_max=5, permutation="random", sigma=100, flatten=False)
        cm = desc.create(H2O)
        self.assertEqual(len(cm), 5)

        # The norm_vector is not zero padded in this implementation. All zero-padding
        # is done at the end after randomly sorting
        self.assertEqual(len(desc._norm_vector), 3)
        cm = desc.create(H2O)
        self.assertEqual(len(cm), 5)
Example #9
0
    def __init__(self,
                 preprocessor=None,
                 batch_size=None,
                 filename="features.db",
                 scheduler="distributed",
                 save_preprocessor="ml4chem",
                 overwrite=True,
                 **kwargs):
        super(CoulombMatrix, self).__init__()

        CoulombMatrixDscribe.__init__(self,
                                      permutation="none",
                                      flatten=False,
                                      **kwargs)

        self.batch_size = batch_size
        self.filename = filename
        self.preprocessor = preprocessor
        self.scheduler = scheduler
        self.overwrite = overwrite
        self.save_preprocessor = save_preprocessor

        # Let's add parameters that are going to be stored in the .params json
        # file.
        self.params = OrderedDict()
        self.params["name"] = self.name()

        # This is a very general way of not forgetting to save variables
        _params = vars()

        # Delete useless variables
        delete = [
            "self",
            "scheduler",
            "overwrite",
            "k",
            "v",
            "value",
            "keys",
            "batch_size",
            "__class__",
        ]

        for param in delete:
            try:
                del _params[param]
            except KeyError:
                # In case the variable does not exist we just pass.
                pass

        for k, v in _params.items():
            if v is not None:
                self.params[k] = v
Example #10
0
    def test_features(self):
        """Tests that the correct features are present in the desciptor."""
        desc = CoulombMatrix(n_atoms_max=5, permutation="eigenspectrum")
        cm = desc.create(H2O)

        self.assertEqual(cm.shape, (5,))

        # Test that eigenvalues are in decreasing order when looking at absolute value
        prev_eig = float("Inf")
        for eigenvalue in cm[: len(H2O)]:
            self.assertTrue(abs(eigenvalue) <= abs(prev_eig))
            prev_eig = eigenvalue

        # Test that array is zero-padded
        self.assertTrue(np.array_equal(cm[len(H2O) :], [0, 0]))
Example #11
0
    def test_periodicity(self):
        """Tests that periodicity is not taken into account in Coulomb matrix
        even if the system is set as periodic.
        """
        system = Atoms(cell=[5, 5, 5],
                       scaled_positions=[
                           [0.1, 0, 0],
                           [0.9, 0, 0],
                       ],
                       symbols=["H", "H"],
                       pbc=True)
        desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=False)
        cm = desc.create(system)

        pos = system.get_positions()
        assumed = 1 * 1 / np.linalg.norm((pos[0] - pos[1]))
        self.assertEqual(cm[0, 1], assumed)
Example #12
0
def ML_potential(config, data):
    model = data['metadata'][3]['best_model_fitted']
    if data['metadata'][1]['descriptor_type'] == 'Coulomb_matrix':
        descriptor = CoulombMatrix(
        n_atoms_max=7,
        flatten=True,
        permutation = 'sorted_l2')
        x = Atoms('O2H5',positions=config)
        X = descriptor.create(x)
        energy = model.predict(X)[0][0]
        return energy

    if data['metadata'][1]['descriptor_type'] == 'PIV':
        descriptor = data['metadata'][1]['descriptor']
        x = Atoms('O2H5', positions=config)
        X = descriptor(x)
        energy = model.predict(X)[0][0]
        return energy
Example #13
0
    def test_distribution(self):
        """Tests if the random sorting obeys a gaussian distribution. Can
        rarely fail when everything is OK.
        """
        # Get the mean value to compare to
        sigma = 5
        desc = CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", flatten=False)
        cm = desc.create(HHe)
        means = sorted(np.linalg.norm(cm, axis=1))
        means = np.linalg.norm(cm, axis=1)
        mu2 = means[0]
        mu1 = means[1]

        # Measures how many times the two rows with biggest norm exchange place
        # when random noise is added. This should correspond to the probability
        # P(X > Y), where X = N(\mu_1, \sigma^2), Y = N(\mu_2, \sigma^2). This
        # probability can be reduced to P(X > Y) = P(X-Y > 0) = P(N(\mu_1 -
        # \mu_2, \sigma^2 + sigma^2) > 0). See e.g.
        # https://en.wikipedia.org/wiki/Sum_of_normally_distributed_random_variables
        desc = CoulombMatrix(n_atoms_max=5, permutation="random", sigma=sigma, flatten=False)
        count = 0
        rand_instances = 20000
        for i in range(0, rand_instances):
            cm = desc.create(HHe)
            if np.linalg.norm(cm[0]) < np.linalg.norm(cm[1]):
                count += 1

        # The expected probability is calculated from the cumulative
        # distribution function.
        expected = 1 - scipy.stats.norm.cdf(0, mu1 - mu2, np.sqrt(sigma**2 + sigma**2))
        observed = count/rand_instances

        self.assertTrue(abs(expected - observed) <= 1e-2)
Example #14
0
class Global_Descriptor_CM(Global_Descriptor_Base):
    def __init__(self, desc_spec):
        """
        make a DScribe CM object
        """

        from dscribe.descriptors import CoulombMatrix

        if "type" not in desc_spec.keys() or desc_spec["type"] != "CM":
            raise ValueError(
                "Type is not CM or cannot find the type of the descriptor")

        # required
        try:
            self.max_atoms = desc_spec['max_atoms']
        except:
            raise ValueError(
                "Not enough information to intialize the `Atomic_Descriptor_CM` object"
            )

        if 'periodic' in desc_spec.keys() and desc_spec['periodic'] == True:
            raise ValueError(
                "Coulomb Matrix cannot be used for periodic systems")

        self.cm = CoulombMatrix(self.max_atoms)
        print("Using CoulombMatrix ...")
        # make an acronym
        self.acronym = "CM" + "-" + str(self.max_atoms)

    def create(self, frame):
        """
        compute the CM descriptor vector for a frame
        Parameters
        ----------
        frame: ASE atom object. Coordinates of a frame.

        Returns
        -------
        desc_dict: a dictionary. each entry contains the essential info of the descriptor, i.e. acronym 
                          and a np.array [N_desc]. Global descriptors for a frame.
                   e.g. {'d1':{ 'acronym': 'CM-*', 'descriptors': `a np.array [N_desc]`}}
        atomic_desc_dict : {}
        """
        if len(frame.get_positions()) > self.max_atoms:
            raise ValueError(
                'the size of the system is larger than the max_atoms of the CM descriptor'
            )
        # notice that we return an empty dictionary for "atomic descriptors"
        return {
            'acronym': self.acronym,
            'descriptors': self.cm.create(frame, n_jobs=1)
        }, {}
Example #15
0
    def test_match_with_sorted(self):
        """Tests if sorting the random coulomb matrix results in the same as
        the sorted coulomb matrix
        """
        desc = CoulombMatrix(n_atoms_max=5, permutation="random", sigma=100, flatten=False)
        rcm = desc.create(H2O)

        srcm = desc.sort(rcm)

        desc = CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", flatten=False)

        scm = desc.create(H2O)

        self.assertTrue(np.array_equal(scm, srcm))
Example #16
0
 def test_exceptions(self):
     """Tests different invalid parameters that should raise an
     exception.
     """
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=5, permutation="unknown")
     with self.assertRaises(ValueError):
         CoulombMatrix(n_atoms_max=-1)
     with self.assertRaises(ValueError):
         cm = CoulombMatrix(n_atoms_max=2)
         cm.create([HHe, H2O])
Example #17
0
    def test_flatten(self):
        """Tests the flattening."""
        # Unflattened
        desc = CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", flatten=False)
        cm = desc.create(H2O)
        self.assertEqual(cm.shape, (5, 5))

        # Flattened
        desc = CoulombMatrix(n_atoms_max=5, permutation="sorted_l2", flatten=True)
        cm = desc.create(H2O)
        self.assertEqual(cm.shape, (25,))
Example #18
0
def setupDescs(structs, indexs, level, descname, chemsyms_uniques, n_atoms,
               steve, v):
    """
    Setup descriptor and run it for ASE structures.
    Return DataFrame with given strictures as descriptors
    """
    # choose the descriptor
    if descname == "CM":
        desc = CoulombMatrix(n_atoms_max=n_atoms, flatten=True)
        # permutation = 'sorted_l2' is default
        n_feat = desc.get_number_of_features()

    if descname == "MBTR":
        desc = MBTR(species=chemsyms_uniques,
                    k1=mk1,
                    k2=mk2,
                    k3=mk3,
                    periodic=False,
                    normalization="l2_each",
                    flatten=True)
        n_feat = desc.get_number_of_features()

    if descname == "SOAP":
        desc = SOAP(species=chemsyms_uniques,
                    periodic=False,
                    rcut=srcut,
                    nmax=snmax,
                    lmax=slmax,
                    average=True)  # Averaging for global
        n_feat = desc.get_number_of_features()

    # Create descriptors
    descs = desc.create(structs, n_jobs=steve)  # Parallel

    # Create a DF of returned `list` of `arrays` of descs
    descs_df = pd.DataFrame(descs, index=indexs)

    if v:
        print("""🔘 Created {}-descriptors for all {} {}-structures.
    Number of features in {}: {}""".format(descname, structs.shape[0], level,
                                           descname, n_feat))

    return descs_df, n_feat
Example #19
0
    def test_flatten(self):
        """Tests the flattening."""
        # Unflattened
        desc = CoulombMatrix(n_atoms_max=5, permutation="eigenspectrum", flatten=False)
        cm = desc.create(H2O)
        # print(cm)
        self.assertEqual(cm.shape, (5,))

        # Flattened
        desc = CoulombMatrix(n_atoms_max=5, permutation="eigenspectrum", flatten=True)
        cm = desc.create(H2O)
        self.assertEqual(cm.shape, (5,))
Example #20
0
    def test_sparse(self):
        """Tests the sparse matrix creation.
        """
        # Dense
        desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=False, sparse=False)
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == np.ndarray)

        # Sparse
        desc = CoulombMatrix(n_atoms_max=5, permutation="none", flatten=True, sparse=True)
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == scipy.sparse.coo_matrix)
Example #21
0
    def test_sparse(self):
        """Tests the sparse matrix creation."""
        # Dense
        desc = CoulombMatrix(
            n_atoms_max=5, permutation="random", sigma=100, flatten=False, sparse=False
        )
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == np.ndarray)

        # Sparse
        desc = CoulombMatrix(
            n_atoms_max=5, permutation="random", sigma=100, flatten=True, sparse=True
        )
        vec = desc.create(H2O)
        self.assertTrue(type(vec) == sparse.COO)
Example #22
0
    def test_batch_create(self):
        """Tests that the batch creation function works as expected.
        """
        samples = [molecule("H2O"), molecule("C6H6")]

        # Test with global descriptor
        descriptor = CoulombMatrix(n_atoms_max=12, permutation="sorted_l2")
        x = batch_create(descriptor, samples, 2)

        # Test with local descriptor
        descriptor = SOAP(
            atomic_numbers=[1, 6, 8],
            rcut=5,
            nmax=3,
            lmax=3,
            sigma=1,
            periodic=False,
            crossover=True,
            average=False,
            sparse=True,
        )
        positions = [[0], [1]]
        x = batch_create(descriptor, samples, positions=positions, n_proc=2)
Example #23
0
    def test_flatten(self):
        """Tests the flattening.
        """
        # Unflattened
        desc = CoulombMatrix(n_atoms_max=5,
                             permutation="random",
                             sigma=100,
                             flatten=False)
        cm = desc.create(H2O)
        self.assertEqual(cm.shape, (5, 5))

        # Flattened
        desc = CoulombMatrix(n_atoms_max=5,
                             permutation="random",
                             sigma=100,
                             flatten=True)
        cm = desc.create(H2O)
        self.assertEqual(cm.shape, (1, 25))
Example #24
0
from dscribe.descriptors import CoulombMatrix

atomic_numbers = [1, 8]
rcut = 6.0
nmax = 8
lmax = 6

# Setting up the CM descriptor
cm = CoulombMatrix(n_atoms_max=6, )

# Creating an atomic system as an ase.Atoms-object
from ase.build import molecule
methanol = molecule("CH3OH")
print(methanol)

# Create CM output for the system
cm_methanol = cm.create(methanol)

print(cm_methanol)
print("flattened", cm_methanol.shape)

# No flattening
cm = CoulombMatrix(n_atoms_max=6, flatten=False)
cm_methanol = cm.create(methanol)

print(cm_methanol)
print("not flattened", cm_methanol.shape)

# Introduce zero-padding
cm = CoulombMatrix(n_atoms_max=10, flatten=False)
cm_methanol = cm.create(methanol)
Example #25
0
import torch
from ase.build import bulk
from ase import Atom, Atoms
import random, pickle
import numpy as np
from ase.formula import Formula

from dscribe.descriptors import CoulombMatrix
cm_dscrb = CoulombMatrix(n_atoms_max=50, )

from dscribe.descriptors import SOAP

species = ["H", "C", "O", "N", "F"]
rcut = 6.0
nmax = 8
lmax = 6

# Setting up the SOAP descriptor
soap = SOAP(
    species=species,
    periodic=False,
    rcut=rcut,
    nmax=nmax,
    lmax=lmax,
)

from sklearn.decomposition import PCA
# pca = PCA(n_components=600)

# pca = None
seed = 1234
Example #26
0
PySpark API.
"""
from functional.streams import ParallelStream as pseq

from collections import namedtuple

import ase.build.bulk

from dscribe.descriptors import CoulombMatrix
from dscribe.descriptors import SineMatrix
from dscribe.descriptors import EwaldMatrix

# Setup the descriptors
n_atoms_max = 4
n_proc = 4
coulombmatrix = CoulombMatrix(n_atoms_max=n_atoms_max)
sinematrix = SineMatrix(n_atoms_max=n_atoms_max)
ewaldmatrix = EwaldMatrix(n_atoms_max=n_atoms_max)

# Define a dataset
data = {
    "NaCl": ase.build.bulk("NaCl", "rocksalt", 5.64),
    "Diamond": ase.build.bulk("C", "diamond", 3.567),
    "Al": ase.build.bulk("Al", "fcc", 4.046),
    "GaAs": ase.build.bulk("GaAs", "zincblende", 5.653),
}

# Setup an iterable that runs through the samples.
Result = namedtuple("Result", "cm sm em")
Sample = namedtuple("Sample", "key value")
samples = [Sample(key, value) for key, value in data.items()]
Example #27
0
# Load configuration from an XYZ file with ASE. See
# "https://wiki.fysik.dtu.dk/ase/ase/io/io.html" for a list of supported file
# formats.
atoms = ase.io.read("nacl.xyz")
atoms.set_cell([5.640200, 5.640200, 5.640200])
atoms.set_initial_charges(atoms.get_atomic_numbers())

# There are utilities for automatically detecting statistics for ASE Atoms
# objects. Typically some statistics are needed for the descriptors in order to
# e.g. define a proper zero-padding
stats = system_stats([atoms])
n_atoms_max = stats["n_atoms_max"]
atomic_numbers = stats["atomic_numbers"]

# Create descriptors for this system directly from the ASE atoms
cm = CoulombMatrix(n_atoms_max, permutation="sorted_l2").create(atoms)
sm = SineMatrix(n_atoms_max, permutation="sorted_l2").create(atoms)
mbtr = MBTR(atomic_numbers,
            k=[1, 2, 3],
            periodic=True,
            weighting={
                "k2": {
                    "function": "exponential",
                    "scale": 0.5,
                    "cutoff": 1e-3
                },
                "k3": {
                    "function": "exponential",
                    "scale": 0.5,
                    "cutoff": 1e-3
                },
Example #28
0
import numpy as np
from ase.build import molecule
from dscribe.descriptors import SOAP
from dscribe.descriptors import CoulombMatrix

# Define atomic structures
samples = [molecule("H2O"), molecule("NO2"), molecule("CO2")]

# Setup descriptors
cm_desc = CoulombMatrix(n_atoms_max=3, permutation="sorted_l2")
soap_desc = SOAP(species=["C", "H", "O", "N"], rcut=5, nmax=8, lmax=6, crossover=True)

# Create descriptors as numpy arrays or sparse arrays
water = samples[0]
coulomb_matrix = cm_desc.create(water)
soap = soap_desc.create(water, positions=[0])

# Easy to use also on multiple systems, can be parallelized across processes
coulomb_matrices = cm_desc.create(samples)
coulomb_matrices = cm_desc.create(samples, n_jobs=3)
oxygen_indices = [np.where(x.get_atomic_numbers() == 8)[0] for x in samples]
oxygen_soap = soap_desc.create(samples, oxygen_indices, n_jobs=3)

# Some descriptors also allow calculating derivatives with respect to atomic
# positions
der, des = soap_desc.derivatives(samples, method="auto", return_descriptor=True)
Example #29
0
h**o = np.array(homo_array)
h**o = [float(x) for x in h**o]
#print(homo_train)
ase_mol = list(ase.io.iread(out_mol, format="xyz"))

## Load statistics from the dataset
stats = system_stats(ase_mol)
atomic_numbers = stats["atomic_numbers"]
max_atomic_number = stats["max_atomic_number"]
min_atomic_number = stats["min_atomic_number"]
min_distance = stats["min_distance"]

cm_desc = CoulombMatrix(
    n_atoms_max=
    29,  ## maximum number of atoms in a molecule that occurs in dataset
    permutation="sorted_l2",
    #sparse=True
)

time_start = time.time()
cm_start = time.time()
############# create CM for data ##############################################################################
cm = cm_desc.create(ase_mol)
cm_end = time.time()
cm_time = np.round(cm_end - cm_start, decimals=3)

################# split CM and h**o array into 5 different parts

### define index
index = np.arange(np.shape(cm)[0])
### shuffle index
Example #30
0
 def create(system):
     desc = CoulombMatrix(n_atoms_max=3,
                          permutation="none",
                          flatten=True)
     return desc.create(system)