def test_flatten(self): """Tests that flattened, and non-flattened output works correctly. """ system = H2O n = 10 n_species = len(set(system.get_atomic_numbers())) # K1 unflattened desc = MBTR( species=[1, 8], k1={ "grid": {"n": n, "min": 1, "max": 8, "sigma": 0.1}, "geometry": {"function": "atomic_number"} }, periodic=False, flatten=False, sparse=False ) feat = desc.create(system)["k1"] self.assertEqual(feat.shape, (n_species, n)) # K1 flattened. The sparse matrix only supports 2D matrices, so the first # dimension is always present, even if it is of length 1. desc.flatten = True feat = desc.create(system) self.assertEqual(feat.shape, (1, n_species*n))
def test_k3_geoms_finite_concave(self): """Test with four atoms in a "dart"-like arrangement. This arrangement has both concave and convex angles. """ atoms = Atoms( positions=[ [0, 0, 0], [np.sqrt(2), np.sqrt(2), 0], [2*np.sqrt(2), 0, 0], [np.sqrt(2), np.tan(np.pi/180*15)*np.sqrt(2), 0], ], symbols=["H", "H", "H", "He"] ) # view(atoms) mbtr = MBTR([1, 2, 10], k=[3], grid=default_grid, periodic=False) mbtr.create(atoms) angles = mbtr._k3_geoms # In finite systems there are n*(n-1)*(n-2)/2 unique angles. n_atoms = len(atoms) n_angles = sum([len(x) for x in angles.values()]) self.assertEqual(n_atoms*(n_atoms-1)*(n_atoms-2)/2, n_angles) assumed = { (0, 1, 0): [math.cos(105/180*math.pi), math.cos(150/180*math.pi), math.cos(105/180*math.pi)], (0, 0, 0): [math.cos(90/180*math.pi), math.cos(45/180*math.pi), math.cos(45/180*math.pi)], (0, 0, 1): [math.cos(45/180*math.pi), math.cos(30/180*math.pi), math.cos(45/180*math.pi), math.cos(30/180*math.pi), math.cos(15/180*math.pi), math.cos(15/180*math.pi)] } self.dict_comparison(angles, assumed)
def test_k2_weights_and_geoms_periodic(self): """Tests that the values of the weight and geometry functions are correct for the k=2 term in periodic systems. """ atoms = Atoms( cell=[ [10, 0, 0], [10, 10, 0], [10, 0, 10], ], symbols=["H", "C"], scaled_positions=[ [0.1, 0.5, 0.5], [0.9, 0.5, 0.5], ] ) mbtr = MBTR( [1, 6], k=[2], grid=default_grid, periodic=True, weighting={ "k2": { "function": "exponential", "scale": 0.8, "cutoff": 1e-3 }, }, ) mbtr.create(atoms) weights = mbtr._k2_weights geoms = mbtr._k2_geoms # Test against the assumed geometry values pos = atoms.get_positions() distances = np.array([ np.linalg.norm(pos[0] - pos[1]), np.linalg.norm(pos[0] - pos[1] + atoms.get_cell()[0, :]), np.linalg.norm(pos[1] - pos[0] - atoms.get_cell()[0, :]) ]) assumed_geoms = { (0, 1): 1/distances } self.dict_comparison(geoms, assumed_geoms) # Test against the assumed weights weight_list = np.exp(-0.8*distances) # The periodic distances are halved weight_list[1:3] /= 2 assumed_weights = { (0, 1): weight_list } self.dict_comparison(weights, assumed_weights)
def test_k3_periodic_cell_translation(self): """Tests that the final spectra does not change when translating atoms in a periodic cell. This is not trivially true unless the weight of distances between periodic neighbours are not halfed. Notice that the values of the geometry and weight functions are not equal before summing them up in the final graph. """ # Original system with atoms separated by a cell wall atoms = Atoms( cell=[ [10, 0, 0], [0, 10, 0], [0, 0, 10], ], symbols=["H", "H", "H", "H"], scaled_positions=[ [0.1, 0.50, 0.5], [0.1, 0.60, 0.5], [0.9, 0.50, 0.5], [0.9, 0.60, 0.5], ], pbc=True ) # Translated system with atoms next to each other atoms2 = atoms.copy() atoms2.translate([5, 0, 0]) atoms2.wrap() mbtr = MBTR( [1], k=[3], grid={ "k3": { "min": -1, "max": 1, "sigma": 0.01, "n": 200, } }, periodic=True, weighting={ "k3": { "function": "exponential", "scale": 1, "cutoff": 1e-3 }, }, ) # The resulting spectra should be indentical spectra1 = mbtr.create(atoms).toarray()[0, :] spectra2 = mbtr.create(atoms2).toarray()[0, :] self.assertTrue(np.allclose(spectra1, spectra2, rtol=0, atol=1e-8))
def test_k2_weights_and_geoms_finite(self): """Tests that the values of the weight and geometry functions are correct for the k=2 term. """ mbtr = MBTR([1, 8], k=[2], grid=default_grid, periodic=False) mbtr.create(H2O) weights = mbtr._k2_weights geoms = mbtr._k2_geoms # Test against the assumed weights pos = H2O.get_positions() assumed_weights = { (0, 0): [1], (0, 1): [1, 1] } self.dict_comparison(weights, assumed_weights) # Test against the assumed geometry values pos = H2O.get_positions() assumed_geoms = { (0, 0): [1/np.linalg.norm(pos[0] - pos[2])], (0, 1): 2*[1/np.linalg.norm(pos[0] - pos[1])] } self.dict_comparison(geoms, assumed_geoms) # Test against system with different indexing mbtr = MBTR([1, 8], k=[2], grid=default_grid, periodic=False) mbtr.create(H2O_2) weights2 = mbtr._k2_weights geoms2 = mbtr._k2_geoms self.dict_comparison(geoms, geoms2) self.dict_comparison(weights, weights2)
def test_k1_weights_and_geoms_finite(self): """Tests that the values of the weight and geometry functions are correct for the k=1 term. """ mbtr = MBTR([1, 8], k=[1], grid=default_grid, periodic=False) mbtr.create(H2O) weights = mbtr._k1_weights geoms = mbtr._k1_geoms # Test against the assumed weights assumed_weights = { (0,): [1, 1], (1,): [1] } self.dict_comparison(weights, assumed_weights) # Test against the assumed geometry values assumed_geoms = { (0,): [1, 1], (1,): [8] } self.dict_comparison(geoms, assumed_geoms) # Test against system with different indexing mbtr = MBTR( [1, 8], k=[1], grid={"k1": {"min": 1, "max": 8, "sigma": 0.1, "n": 10}}, periodic=False ) mbtr.create(H2O_2) weights2 = mbtr._k1_weights geoms2 = mbtr._k1_geoms self.dict_comparison(weights, weights2) self.dict_comparison(geoms, geoms2)
def test_k3_weights_and_geoms_finite(self): """Tests that all the correct angles are present in finite systems. There should be n*(n-1)*(n-2)/2 unique angles where the division by two gets rid of duplicate angles. """ # Test with water molecule mbtr = MBTR([1, 8], k=[3], grid=default_grid, periodic=False) mbtr.create(H2O) geoms = mbtr._k3_geoms weights = mbtr._k3_weights assumed_geoms = { (0, 1, 0): 1*[math.cos(104/180*math.pi)], (0, 0, 1): 2*[math.cos(38/180*math.pi)], } self.dict_comparison(geoms, assumed_geoms) assumed_weights = { (0, 1, 0): [1], (0, 0, 1): [1, 1], } self.dict_comparison(weights, assumed_weights) # Test against system with different indexing mbtr = MBTR([1, 8], k=[3], grid=default_grid, periodic=False) mbtr.create(H2O_2) weights2 = mbtr._k3_weights geoms2 = mbtr._k3_geoms self.assertEqual(weights, weights2) self.assertEqual(geoms, geoms2)
def create(data): """This is the function that is called by each process but with different parts of the data. """ i_part = data[0] samples = data[1] mbtr = MBTR( atomic_numbers=atomic_numbers, k=[1, 2], periodic=True, grid={ "k1": { "min": min(atomic_numbers) - 1, "max": max(atomic_numbers) + 1, "sigma": 0.1, "n": 100, }, "k2": { "min": 0, "max": 1 / min_distance, "sigma": 0.01, "n": 100, }, }, weighting={ "k2": { "function": lambda x: np.exp(-0.5 * x), "threshold": 1e-3 }, }, flatten=True, ) n_samples = len(samples) n_features = int(mbtr.get_number_of_features()) mbtr_inputs = lil_matrix((n_samples, n_features)) # Create descriptors for the dataset for i_sample, sample in enumerate(samples): system = sample.value mbtr_mat = mbtr.create(system) mbtr_inputs[i_sample, :] = mbtr_mat # Return the list of features for each sample return { "part": i_part, "mbtr": mbtr_inputs, }
def test_periodic_supercell_similarity(self): """Tests that the output spectrum of various supercells of the same crystal is identical after it is normalized. """ decay = 1 desc = MBTR( species=["H"], periodic=True, k1={ "geometry": {"function": "atomic_number"}, "grid": {"min": 0, "max": 2, "sigma": 0.1, "n": 100}, }, k2={ "geometry": {"function": "inverse_distance"}, "grid": {"min": 0, "max": 1.0, "sigma": 0.02, "n": 200}, "weighting": {"function": "exponential", "scale": decay, "cutoff": 1e-3}, }, k3={ "geometry": {"function": "cosine"}, "grid": {"min": -1.0, "max": 1.0, "sigma": 0.02, "n": 200}, "weighting": {"function": "exponential", "scale": decay, "cutoff": 1e-3}, }, flatten=True, sparse=False, normalization="l2_each", ) # Create various supercells for the FCC structure a1 = bulk('H', 'fcc', a=2.0) # Primitive a2 = a1*[2, 2, 2] # Supercell a3 = bulk('H', 'fcc', a=2.0, orthorhombic=True) # Orthorhombic a4 = bulk('H', 'fcc', a=2.0, cubic=True) # Conventional cubic output = desc.create([a1, a2, a3, a4]) # Test for equality self.assertTrue(np.allclose(output[0, :], output[0, :], atol=1e-5, rtol=0)) self.assertTrue(np.allclose(output[0, :], output[1, :], atol=1e-5, rtol=0)) self.assertTrue(np.allclose(output[0, :], output[2, :], atol=1e-5, rtol=0)) self.assertTrue(np.allclose(output[0, :], output[3, :], atol=1e-5, rtol=0))
def test_flatten(self): system = H2O n = 10 n_species = len(set(system.get_atomic_numbers())) # K1 unflattened desc = MBTR(species=[1, 8], k=[1], grid={"k1": { "n": n, "min": 1, "max": 8, "sigma": 0.1 }}, periodic=False, flatten=False, sparse=False) feat = desc.create(system)["k1"] self.assertEqual(feat.shape, (n_species, n)) # K1 flattened. The sparse matrix only supports 2D matrices, so the first # dimension is always present, even if it is of length 1. desc = MBTR(species=[1, 8], k=[1], grid={"k1": { "n": n, "min": 1, "max": 8, "sigma": 0.1 }}, periodic=False) feat = desc.create(system) self.assertEqual(feat.shape, (1, n_species * n))
def create(system): desc = MBTR( atomic_numbers=[1, 8], k=[1, 2, 3], periodic=False, grid={ "k1": { "min": 10, "max": 18, "sigma": 0.1, "n": 100, }, "k2": { "min": 0, "max": 0.7, "sigma": 0.01, "n": 100, }, "k3": { "min": -1.0, "max": 1.0, "sigma": 0.05, "n": 100, } }, weighting={ "k2": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, "k3": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, }, flatten=True ) return desc.create(system)
def test_parallel_sparse(self): """Tests creating sparse output parallelly. """ # Test indices samples = [molecule("CO"), molecule("N2O")] desc = MBTR( species=[6, 7, 8], k={1, 2}, grid={ "k1": { "min": 1, "max": 8, "sigma": 0.1, "n": 100, }, "k2": { "min": 0, "max": 1 / 0.7, "sigma": 0.1, "n": 100, } }, weighting={ "k2": { "function": "exponential", "scale": 0.5, "cutoff": 1e-2 } }, periodic=False, flatten=True, sparse=True, ) n_features = desc.get_number_of_features() # Multiple systems, serial job output = desc.create( system=samples, n_jobs=1, ).toarray() assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0]).toarray() assumed[1, :] = desc.create(samples[1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Multiple systems, parallel job output = desc.create( system=samples, n_jobs=2, ).toarray() assumed = np.empty((2, n_features)) assumed[0, :] = desc.create(samples[0]).toarray() assumed[1, :] = desc.create(samples[1]).toarray() self.assertTrue(np.allclose(output, assumed))
def test_grid_change(self): """Tests that the calculation of MBTR with new grid settings works. """ grid = { "k1": { "min": 1, "max": 8, "sigma": 0.1, "n": 50, }, "k2": { "min": 0, "max": 1/0.7, "sigma": 0.1, "n": 50, }, "k3": { "min": -1, "max": 1, "sigma": 0.1, "n": 50, } } desc = MBTR( atomic_numbers=[1, 8], k=[1, 2, 3], periodic=True, grid=grid, weighting={ "k2": { "function": "exponential", "scale": 1, "cutoff": 1e-4 }, "k3": { "function": "exponential", "scale": 1, "cutoff": 1e-4 } }, flatten=True ) # Initialize scalars with a given system desc.initialize_scalars(H2O) # Request spectrum with different grid settings spectrum1 = desc.create_with_grid().toarray()[0] grid["k1"]["sigma"] = 0.09 grid["k2"]["sigma"] = 0.09 grid["k3"]["sigma"] = 0.09 spectrum2 = desc.create_with_grid(grid).toarray()[0] # Check that contents are not equal, but have same peaks self.assertFalse(np.allclose(spectrum1, spectrum2)) peak_ids1 = find_peaks_cwt(spectrum1, [5]) peak_ids2 = find_peaks_cwt(spectrum2, [5]) self.assertTrue(np.array_equal(peak_ids1, peak_ids2))
def test_k3_peaks_periodic(self): """Tests that the final spectra does not change when translating atoms in a periodic cell. This is not trivially true unless the weight of angles is weighted according to the cell indices of the involved three atoms. Notice that the values of the geometry and weight functions are not equal before summing them up in the final graph. """ scale = 0.85 desc = MBTR( species=["H"], k3={ "geometry": {"function": "angle"}, "grid": {"min": 0, "max": 180, "sigma": 5, "n": 2000}, "weighting": {"function": "exp", "scale": scale, "cutoff": 1e-3}, }, normalize_gaussians=False, periodic=True, flatten=True, sparse=False ) atoms = Atoms( cell=[ [10, 0, 0], [0, 10, 0], [0, 0, 10], ], symbols=3*["H"], scaled_positions=[ [0.05, 0.40, 0.5], [0.05, 0.60, 0.5], [0.95, 0.5, 0.5], ], pbc=True ) features = desc.create(atoms)[0, :] x = desc.get_k3_axis() # Calculate assumed locations and intensities. assumed_locs = np.array([45, 90]) dist = 2+2*np.sqrt(2) # The total distance around the three atoms weight = np.exp(-scale*dist) assumed_ints = np.array([4*weight, 2*weight]) assumed_ints /= 2 # The periodic distances ar halved because they belong to different cells # Check the H-H-H peaks hhh_feat = features[desc.get_location(("H", "H", "H"))] hhh_peak_indices = find_peaks(hhh_feat, prominence=0.01)[0] hhh_peak_locs = x[hhh_peak_indices] hhh_peak_ints = hhh_feat[hhh_peak_indices] self.assertTrue(np.allclose(hhh_peak_locs, assumed_locs, rtol=0, atol=1e-1)) self.assertTrue(np.allclose(hhh_peak_ints, assumed_ints, rtol=0, atol=1e-1)) # Check that everything else is zero features[desc.get_location(("H", "H", "H"))] = 0 self.assertEqual(features.sum(), 0)
def test_sparse(self): """Tests the sparse matrix creation. """ # Dense desc = MBTR([1, 8], k=[1], grid=default_grid, periodic=False, flatten=True, sparse=False) vec = desc.create(H2O) self.assertTrue(type(vec) == np.ndarray) # Sparse desc = MBTR([1, 8], k=[1], grid=default_grid, periodic=False, flatten=True, sparse=True) vec = desc.create(H2O) self.assertTrue(type(vec) == scipy.sparse.coo_matrix)
def setupDescs(structs, indexs, level, descname, chemsyms_uniques, n_atoms, steve, v): """ Setup descriptor and run it for ASE structures. Return DataFrame with given strictures as descriptors """ # choose the descriptor if descname == "CM": desc = CoulombMatrix(n_atoms_max=n_atoms, flatten=True) # permutation = 'sorted_l2' is default n_feat = desc.get_number_of_features() if descname == "MBTR": desc = MBTR(species=chemsyms_uniques, k1=mk1, k2=mk2, k3=mk3, periodic=False, normalization="l2_each", flatten=True) n_feat = desc.get_number_of_features() if descname == "SOAP": desc = SOAP(species=chemsyms_uniques, periodic=False, rcut=srcut, nmax=snmax, lmax=slmax, average=True) # Averaging for global n_feat = desc.get_number_of_features() # Create descriptors descs = desc.create(structs, n_jobs=steve) # Parallel # Create a DF of returned `list` of `arrays` of descs descs_df = pd.DataFrame(descs, index=indexs) if v: print("""🔘 Created {}-descriptors for all {} {}-structures. Number of features in {}: {}""".format(descname, structs.shape[0], level, descname, n_feat)) return descs_df, n_feat
def test_k3_peaks_finite(self): """Tests that all the correct angles are present in finite systems. There should be n*(n-1)*(n-2)/2 unique angles where the division by two gets rid of duplicate angles. """ desc = MBTR( species=["H", "O"], k3={ "geometry": {"function": "angle"}, "grid": {"min": -10, "max": 180, "sigma": 5, "n": 2000}, "weighting": {"function": "unity"}, }, normalize_gaussians=False, periodic=False, flatten=True, sparse=False ) features = desc.create(H2O)[0, :] x = desc.get_k3_axis() # Check the H-H-O peaks hho_assumed_locs = np.array([38]) hho_assumed_ints = np.array([2]) hho_feat = features[desc.get_location(("H", "H", "O"))] hho_peak_indices = find_peaks(hho_feat, prominence=0.5)[0] hho_peak_locs = x[hho_peak_indices] hho_peak_ints = hho_feat[hho_peak_indices] self.assertTrue(np.allclose(hho_peak_locs, hho_assumed_locs, rtol=0, atol=5e-2)) self.assertTrue(np.allclose(hho_peak_ints, hho_assumed_ints, rtol=0, atol=5e-2)) # Check the H-O-H peaks hoh_assumed_locs = np.array([104]) hoh_assumed_ints = np.array([1]) hoh_feat = features[desc.get_location(("H", "O", "H"))] hoh_peak_indices = find_peaks(hoh_feat, prominence=0.5)[0] hoh_peak_locs = x[hoh_peak_indices] hoh_peak_ints = hoh_feat[hoh_peak_indices] self.assertTrue(np.allclose(hoh_peak_locs, hoh_assumed_locs, rtol=0, atol=5e-2)) self.assertTrue(np.allclose(hoh_peak_ints, hoh_assumed_ints, rtol=0, atol=5e-2)) # Check that everything else is zero features[desc.get_location(("H", "H", "O"))] = 0 features[desc.get_location(("H", "O", "H"))] = 0 self.assertEqual(features.sum(), 0)
def test_k2_peaks_periodic(self): """Tests the correct peak locations and intensities are found for the k=2 term in periodic systems. """ atoms = Atoms( cell=[ [10, 0, 0], [10, 10, 0], [10, 0, 10], ], symbols=["H", "C"], scaled_positions=[ [0.1, 0.5, 0.5], [0.9, 0.5, 0.5], ] ) desc = MBTR( species=["H", "C"], k2={ "geometry": {"function": "distance"}, "grid": {"min": 0, "max": 10, "sigma": 0.5, "n": 1000}, "weighting": {"function": "exp", "scale": 0.8, "cutoff": 1e-3}, }, normalize_gaussians=False, periodic=True, flatten=True, sparse=False ) features = desc.create(atoms)[0, :] x = desc.get_k2_axis() # Calculate assumed locations and intensities. assumed_locs = np.array([2, 8]) assumed_ints = np.exp(-0.8*np.array([2, 8])) assumed_ints[0] *= 2 # There are two periodic distances at 2Å assumed_ints[0] /= 2 # The periodic distances ar halved because they belong to different cells # Check the H-C peaks hc_feat = features[desc.get_location(("H", "C"))] hc_peak_indices = find_peaks(hc_feat, prominence=0.001)[0] hc_peak_locs = x[hc_peak_indices] hc_peak_ints = hc_feat[hc_peak_indices] self.assertTrue(np.allclose(hc_peak_locs, assumed_locs, rtol=0, atol=1e-2)) self.assertTrue(np.allclose(hc_peak_ints, assumed_ints, rtol=0, atol=1e-2)) # Check that everything else is zero features[desc.get_location(("H", "C"))] = 0 self.assertEqual(features.sum(), 0)
def test_properties(self): """Used to test that changing the setup through properties works as intended. """ # Test changing species a = MBTR( k=[1, 2, 3], grid=default_grid, periodic=False, species=[1, 8], sparse=False, ) nfeat1 = a.get_number_of_features() vec1 = a.create(H2O) a.species = ["C", "H", "O"] nfeat2 = a.get_number_of_features() vec2 = a.create(molecule("CH3OH")) self.assertTrue(nfeat1 != nfeat2) self.assertTrue(vec1.shape[1] != vec2.shape[1])
def test_k2_peaks_finite(self): """Tests the correct peak locations and intensities are found for the k=2 term in finite systems. """ desc = MBTR( species=[1, 8], k2={ "geometry": {"function": "distance"}, "grid": {"min": -1, "max": 3, "sigma": 0.5, "n": 1000}, "weighting": {"function": "unity"}, }, normalize_gaussians=False, periodic=False, flatten=True, sparse=False ) features = desc.create(H2O)[0, :] pos = H2O.get_positions() x = desc.get_k2_axis() # Check the H-H peaks hh_feat = features[desc.get_location(("H", "H"))] hh_peak_indices = find_peaks(hh_feat, prominence=0.5)[0] hh_peak_locs = x[hh_peak_indices] hh_peak_ints = hh_feat[hh_peak_indices] self.assertTrue(np.allclose(hh_peak_locs, [np.linalg.norm(pos[0] - pos[2])], rtol=0, atol=1e-2)) self.assertTrue(np.allclose(hh_peak_ints, [1], rtol=0, atol=1e-2)) # Check the O-H peaks ho_feat = features[desc.get_location(("H", "O"))] ho_peak_indices = find_peaks(ho_feat, prominence=0.5)[0] ho_peak_locs = x[ho_peak_indices] ho_peak_ints = ho_feat[ho_peak_indices] self.assertTrue(np.allclose(ho_peak_locs, np.linalg.norm(pos[0] - pos[1]), rtol=0, atol=1e-2)) self.assertTrue(np.allclose(ho_peak_ints, [2], rtol=0, atol=1e-2)) # Check that everything else is zero features[desc.get_location(("H", "H"))] = 0 features[desc.get_location(("H", "O"))] = 0 self.assertEqual(features.sum(), 0)
def test_k1_peaks_finite(self): """Tests the correct peak locations and intensities are found for the k=1 term. """ desc = MBTR( species=[1, 8], k1={ "geometry": {"function": "atomic_number"}, "grid": {"min": 0, "max": 9, "sigma": 0.5, "n": 1000} }, normalize_gaussians=False, periodic=False, flatten=True, sparse=False ) features = desc.create(H2O)[0, :] x = desc.get_k1_axis() # Check the H peaks h_feat = features[desc.get_location(("H"))] h_peak_indices = find_peaks(h_feat, prominence=1)[0] h_peak_locs = x[h_peak_indices] h_peak_ints = h_feat[h_peak_indices] self.assertTrue(np.allclose(h_peak_locs, [1], rtol=0, atol=1e-2)) self.assertTrue(np.allclose(h_peak_ints, [2], rtol=0, atol=1e-2)) # Check the O peaks o_feat = features[desc.get_location(("O"))] o_peak_indices = find_peaks(o_feat, prominence=1)[0] o_peak_locs = x[o_peak_indices] o_peak_ints = o_feat[o_peak_indices] self.assertTrue(np.allclose(o_peak_locs, [8], rtol=0, atol=1e-2)) self.assertTrue(np.allclose(o_peak_ints, [1], rtol=0, atol=1e-2)) # Check that everything else is zero features[desc.get_location(("H"))] = 0 features[desc.get_location(("O"))] = 0 self.assertEqual(features.sum(), 0)
def plotDescs(structs, indexs, level, descname, chemsyms, n_atoms, steve, v, path_output, save=True): """ Plot descriptors """ # choose the descriptor if descname == "CM": desc = CoulombMatrix( n_atoms_max=n_atoms, flatten=False, permutation='none') # permutation = 'sorted_l2' is default n_feat = desc.get_number_of_features() # Create descriptors descs = desc.create(structs, n_jobs=steve) # Parallel # Plot CM of zero_cluster and save it to outputs-folder sns.heatmap(descs, cmap='Spectral', robust=True, xticklabels=chemsyms, yticklabels=chemsyms) plt.title("CM of {}".format(indexs)) if save: plt.savefig("{}/{}_CM.png".format(path_output, indexs[:-4])) if descname == "MBTR": desc = MBTR(species=list(set(chemsyms)), k1=mk1, k2=mk2, k3=mk3, periodic=False, normalization="l2_each", flatten=False) n_feat = desc.get_number_of_features() descs = desc.create(structs, n_jobs=steve) # Parallel # Create the mapping between an index in the output and the corresponding chemical symbol n_elements = len(desc.species) # dict({index_of_atom_type:Z_of_atom_type}) imap = desc.index_to_atomic_number # dict({index_of_atom_type:atom_type_symbol}) smap = { index: ase.data.chemical_symbols[number] for index, number in imap.items() } # Plot k=1 x = np.linspace(0, 1, 100) # las number defines the resolution of x-axis x1 = desc.get_k1_axis() # from fullmetalfelix/ML-CSC-tutorial fig, ax = plt.subplots() for i in range(n_elements): plt.plot(x1, descs["k1"][i, :], label="{}".format(smap[i])) ax.set_xlabel("Charge") ax.set_xlabel( "Atomic number") #, size=20) # from fullmetalfelix/ML-CSC-tutorial ax.set_ylabel("k1 values (arbitrary units)") #, size=20) plt.legend() plt.title("MBTR k1 of {}".format(indexs)) if save: plt.savefig("{}/{}_MBTR_k1.png".format(path_output, indexs[:-4])) # Plot k=2 x = np.linspace(0, 0.5, 100) # Kato mitä tää on docsista x2 = desc.get_k2_axis() # from fullmetalfelix/ML-CSC-tutorial fig, ax = plt.subplots() for i in range(n_elements): for j in range(n_elements): if j >= i: plt.plot(x2, descs["k2"][i, j, :], label="{}-{}".format(smap[i], smap[j])) ax.set_xlabel("Inverse distance (1/angstrom)" ) #, size=20) # How to make not inverse? ax.set_ylabel("k2 values (arbitrary units)") #, size=20) plt.legend() plt.title("MBTR k2 of {}".format(indexs)) if save: plt.savefig("{}/{}_MBTR_k2.png".format(path_output, indexs[:-4])) # Plot k=3 x = np.linspace(0, 0.5, 100) # Kato mitä tää on docsista x3 = desc.get_k3_axis() # from fullmetalfelix/ML-CSC-tutorial fig, ax = plt.subplots() for i in range(n_elements): for j in range(n_elements): if j >= i: for k in range(n_elements): if k >= j and smap[k] == "S": plt.plot(x3, descs["k3"][i, j, k, :], label="{}-{}-{}".format( smap[i], smap[j], smap[k])) ax.set_xlabel("cos(angle)") #, size=20) ax.set_ylabel("k3 values (arbitrary units)") #, size=20) plt.legend() plt.title("MBTR k3 of {}".format(indexs)) if save: plt.savefig("{}/{}_MBTR_k3.png".format(path_output, indexs[:-4])) if descname == "SOAP": desc = SOAP(species=list(set(chemsyms)), periodic=False, rcut=srcut, nmax=snmax, lmax=slmax, average=False) # Averaging for global n_feat = desc.get_number_of_features() descs = desc.create(structs, n_jobs=steve) # Plot SOAPs for all atom pairs chemsyms_combos = list(combinations_with_replacement(desc.species, 2)) for combo in chemsyms_combos: # The locations of specific element combinations can be retrieved like this. pairloc = desc.get_location(combo) # These locations can be directly used to slice the corresponding part from an # SOAP output for e.g. plotting. plt.plot(descs[0, pairloc], label="{}-{}".format(combo[0], combo[1])) plt.legend() #plt.xlim(20,40) plt.xlabel("N of features for an atom pair") plt.ylabel("Output value of SOAPs") plt.title("SOAPs of {}".format(indexs)) if save: plt.savefig("{}/{}_SOAP.png".format(path_output, indexs[:-4])) if v: print("🔘 Plotting {} done.".format(descname))
from dscribe.descriptors import MBTR atomic_numbers = [1, 8] n = 100 # Setting up the MBTR descriptor mbtr = MBTR(atomic_numbers=atomic_numbers, k=2, periodic=False, grid={"k2": { "min": 0, "max": 1, "n": n, "sigma": 0.1 }}, weighting=None) # Creating an atomic system as an ase.Atoms-object from ase.build import molecule import ase.data water = molecule("H2O") # Create MBTR output for the system mbtr_water = mbtr.create(water) print(mbtr_water) print(mbtr_water.shape) from ase.build import bulk nacl = bulk("NaCl", "rocksalt", a=5.64)
import numpy as np from dscribe.descriptors import MBTR # Setup mbtr = MBTR( species=["H", "O"], k1={ "geometry": {"function": "atomic_number"}, "grid": {"min": 0, "max": 8, "n": 100, "sigma": 0.1}, }, k2={ "geometry": {"function": "inverse_distance"}, "grid": {"min": 0, "max": 1, "n": 100, "sigma": 0.1}, "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-3}, }, k3={ "geometry": {"function": "cosine"}, "grid": {"min": -1, "max": 1, "n": 100, "sigma": 0.1}, "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-3}, }, periodic=False, normalization="l2_each", ) # Create from ase.build import molecule water = molecule("H2O") # Create MBTR output for the system mbtr_water = mbtr.create(water)
# formats. atoms = ase.io.read("nacl.xyz") atoms.set_cell([5.640200, 5.640200, 5.640200]) atoms.set_initial_charges(atoms.get_atomic_numbers()) # There are utilities for automatically detecting statistics for ASE Atoms # objects. Typically some statistics are needed for the descriptors in order to # e.g. define a proper zero-padding stats = system_stats([atoms]) n_atoms_max = stats["n_atoms_max"] atomic_numbers = stats["atomic_numbers"] # Create descriptors for this system directly from the ASE atoms cm = CoulombMatrix(n_atoms_max, permutation="sorted_l2").create(atoms) sm = SineMatrix(n_atoms_max, permutation="sorted_l2").create(atoms) mbtr = MBTR(atomic_numbers, k=[1, 2, 3], periodic=True, weighting={ "k2": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, "k3": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, }).create(atoms)
def test_constructor(self): """Tests different valid and invalid constructor values. """ # Cannot create a sparse and non-flattened output. with self.assertRaises(ValueError): MBTR( species=["H"], k1=default_k1, periodic=False, flatten=False, sparse=True, ) # Weighting needs to be provided for periodic system and terms k>1 with self.assertRaises(ValueError): MBTR( species=["H"], k2={"geometry": default_k2["geometry"], "grid": default_k2["grid"] }, periodic=True, ) MBTR( species=["H"], k2={"geometry": default_k2["geometry"], "grid": default_k2["grid"], "weighting": {"function": "unity"} }, periodic=True, ) with self.assertRaises(ValueError): MBTR( species=["H"], k3={"geometry": default_k3["geometry"], "grid": default_k3["grid"]}, periodic=True, ) MBTR( species=["H"], k3={"geometry": default_k3["geometry"], "grid": default_k3["grid"], "weighting": {"function": "unity"}}, periodic=True, ) # Invalid weighting function with self.assertRaises(ValueError): MBTR( species=[1], k1={"geometry": default_k1["geometry"], "grid": default_k1["grid"], "weighting": {"function": "none"} }, periodic=True ) with self.assertRaises(ValueError): MBTR( species=[1], k2={"geometry": default_k2["geometry"], "grid": default_k2["grid"], "weighting": {"function": "none"} }, periodic=True, ) with self.assertRaises(ValueError): MBTR( species=[1], k3={"geometry": default_k3["geometry"], "grid": default_k3["grid"], "weighting": {"function": "none"} }, periodic=True, ) # Invalid geometry function with self.assertRaises(ValueError): MBTR( species=[1], k1={"geometry": {"function": "none"}, "grid": {"min": 0, "max": 1, "n": 10, "sigma": 0.1} }, periodic=False, ) with self.assertRaises(ValueError): MBTR( species=[1], k2={"geometry": {"function": "none"}, "grid": {"min": 0, "max": 1, "n": 10, "sigma": 0.1} }, periodic=False, ) with self.assertRaises(ValueError): MBTR( species=[1], k3={"geometry": {"function": "none"}, "grid": {"min": 0, "max": 1, "n": 10, "sigma": 0.1} }, periodic=False, ) # Missing cutoff with self.assertRaises(ValueError): setup = copy.deepcopy(default_k2) del setup["weighting"]["cutoff"] MBTR( species=[1], k2=setup, periodic=True, ) # Missing scale with self.assertRaises(ValueError): setup = copy.deepcopy(default_k2) del setup["weighting"]["scale"] MBTR( species=[1], k2=setup, periodic=True, )
mbtr_constructor = MBTR( species=elements, k1={ "geometry": { "function": "atomic_number" }, "grid": { "min": 0, "max": 17, "n": 100, "sigma": 0.01 }, }, k2={ "geometry": { "function": "inverse_distance" }, "grid": { "min": 0, "max": 1, "n": 100, "sigma": 0.01 }, "weighting": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, }, k3={ "geometry": { "function": "cosine" }, "grid": { "min": -1, "max": 1, "n": 100, "sigma": 0.01 }, "weighting": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, }, periodic=False, normalization="n_atoms", )
def test_number_of_features(self): """Tests that the reported number of features is correct. """ # K=1 n = 100 atomic_numbers = [1, 8] n_elem = len(atomic_numbers) mbtr = MBTR( species=atomic_numbers, k1={ "geometry": {"function": "atomic_number"}, "grid": {"min": 1, "max": 8, "sigma": 0.1, "n": 100} }, periodic=False, flatten=True ) n_features = mbtr.get_number_of_features() expected = n_elem*n self.assertEqual(n_features, expected) # K=2 mbtr = MBTR( species=atomic_numbers, k1={ "geometry": {"function": "atomic_number"}, "grid": {"min": 1, "max": 8, "sigma": 0.1, "n": 100}, }, k2={ "geometry": {"function": "inverse_distance"}, "grid": {"min": 0, "max": 1/0.7, "sigma": 0.1, "n": n}, "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-2}, }, periodic=False, flatten=True ) n_features = mbtr.get_number_of_features() expected = n_elem*n + 1/2*(n_elem)*(n_elem+1)*n self.assertEqual(n_features, expected) # K=3 mbtr = MBTR( species=atomic_numbers, k1={ "geometry": {"function": "atomic_number"}, "grid": {"min": 1, "max": 8, "sigma": 0.1, "n": 100}, }, k2={ "geometry": {"function": "inverse_distance"}, "grid": {"min": 0, "max": 1/0.7, "sigma": 0.1, "n": n}, "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-2}, }, k3={ "geometry": {"function": "cosine"}, "grid": {"min": -1, "max": 1, "sigma": 0.1, "n": n}, "weighting": {"function": "exponential", "scale": 0.5, "cutoff": 1e-2}, }, periodic=False, flatten=True ) n_features = mbtr.get_number_of_features() expected = n_elem*n + 1/2*(n_elem)*(n_elem+1)*n + n_elem*1/2*(n_elem)*(n_elem+1)*n self.assertEqual(n_features, expected)
mbtr_desc = MBTR( species=atomic_numbers, k1={ "geometry": { "function": "atomic_number" }, "grid": { "min": min_atomic_number, "max": max_atomic_number, "n": 200, "sigma": sigma1 }, }, k2={ "geometry": { "function": "inverse_distance" }, "grid": { "min": 0, "max": 1, "n": 200, "sigma": sigma2 }, "weighting": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, }, k3={ "geometry": { "function": "cosine" }, "grid": { "min": -1, "max": 1, "n": 200, "sigma": sigma3 }, "weighting": { "function": "exponential", "scale": 0.5, "cutoff": 1e-3 }, }, periodic=False, normalization="l2_each", ) #.create(ase_train_cv)
def test_properties(self): """Used to test that changing the setup through properties works as intended. """ # Test changing species a = MBTR( k1=default_k1, k2=default_k2, k3=default_k3, periodic=False, species=[1, 8], sparse=False, flatten=True, ) nfeat1 = a.get_number_of_features() vec1 = a.create(H2O) a.species = ["C", "H", "O"] nfeat2 = a.get_number_of_features() vec2 = a.create(molecule("CH3OH")) self.assertTrue(nfeat1 != nfeat2) self.assertTrue(vec1.shape[1] != vec2.shape[1]) # Test changing geometry function and grid setup a.k1 = { "geometry": {"function": "atomic_number"}, "grid": {"min": 5, "max": 6, "sigma": 0.1, "n": 50}, } vec3 = a.create(H2O) self.assertTrue(not np.allclose(vec2, vec3)) a.k2 = { "geometry": {"function": "distance"}, "grid": {"min": 0, "max": 10, "sigma": 0.1, "n": 50}, "weighting": {"function": "exponential", "scale": 0.6, "cutoff": 1e-2}, } vec4 = a.create(H2O) self.assertTrue(not np.allclose(vec3, vec4)) a.k3 = { "geometry": {"function": "angle"}, "grid": {"min": 0, "max": 180, "sigma": 5, "n": 50}, "weighting": {"function": "exponential", "scale": 0.6, "cutoff": 1e-2}, } vec5 = a.create(H2O) self.assertTrue(not np.allclose(vec4, vec5))