def test_model_call(self): rep = SphericalInvariants(**self.hypers) features = rep.transform([self.frame]) for target_type in ["Atom", "Structure"]: cosine_kernel = Kernel(rep, name="Cosine", target_type=target_type, zeta=2) cosine_kernel(features) # wrong name with self.assertRaises(RuntimeError): Kernel(rep, name="WrongName", target_type="Structure", zeta=2) with self.assertRaises(RuntimeError): Kernel(rep, name="cosine", target_type="Structure", zeta=2) # wrong target_type with self.assertRaises(RuntimeError): Kernel(rep, name="Cosine", target_type="WrongType", zeta=2) with self.assertRaises(RuntimeError): Kernel(rep, name="Cosine", target_type="structure", zeta=2) with self.assertRaises(RuntimeError): Kernel(rep, name="Cosine", target_type="atom", zeta=2) # wrong zeta with self.assertRaises(ValueError): Kernel(rep, name="Cosine", target_type="Structure", zeta=2.5) with self.assertRaises(ValueError): Kernel(rep, name="Cosine", target_type="Structure", zeta=-2)
def test_representation_transform(self): rep = SphericalInvariants(**self.hypers) features = rep.transform(self.frames) test = features.get_features(rep) kk_ref = np.dot(test, test.T) # test that the feature matrix exported to python in various ways # are equivalent X_t = features.get_features(rep, self.global_species) kk = np.dot(X_t, X_t.T) self.assertTrue(np.allclose(kk, kk_ref)) X_t = features.get_features(rep, self.global_species + [70]) kk = np.dot(X_t, X_t.T) self.assertTrue(np.allclose(kk, kk_ref)) species = copy(self.global_species) species.pop() X_t = features.get_features(rep, species) kk = np.dot(X_t, X_t.T) self.assertFalse(np.allclose(kk, kk_ref)) X_t = features.get_features_by_species(rep) kk = dot(X_t, X_t) self.assertTrue(np.allclose(kk, kk_ref))
def compute_representation(feature_hypers, frames, center_atom_id_mask): if feature_hypers["feature_type"] == "soap": representation = SphericalInvariants(**feature_hypers["feature_parameters"]) return representation.transform(frames).get_features(representation) elif feature_hypers["feature_type"] == "wasserstein": return compute_radial_spectrum_wasserstein_features(feature_hypers["feature_parameters"], frames) elif feature_hypers["feature_type"] == "sorted_distances": features = compute_sorted_distances(feature_hypers["feature_parameters"], frames, center_atom_id_mask) return features elif feature_hypers["feature_type"] == "precomputed": print("WARNING we assume for the precomputed features that only one environment in each structure was computed.") parameters = feature_hypers['feature_parameters'] nb_envs = sum([len(structure_mask) for structure_mask in center_atom_id_mask]) if parameters["filetype"] == "npy": pathname = f"{FEATURES_ROOT}/{parameters['feature_name']}/{parameters['filename']}" return np.load(pathname)[:nb_envs] elif parameters["filetype"] == "txt": pathname = f"{FEATURES_ROOT}/{parameters['feature_name']}/{parameters['filename']}" return np.loadtxt(pathname)[:nb_envs] elif parameters["filetype"] == "frame_info": return np.array([frame.info[parameters['feature_name']] for frame in frames])[:,np.newaxis][:nb_envs] # hardcoded case elif parameters["feature_name"] == "displaced_hydrogen_distance": return load_hydrogen_distance_dataset(frames)[:nb_envs]
def test_representation_transform(self): rep = SphericalInvariants(**self.hypers) features = rep.transform([self.frame]) test = features.get_dense_feature_matrix(rep)
def compute_si_cpp(job): # setup input for the script data = job.statepoint() np.random.seed(data['seed']) if data['representation']['coefficient_subselection'] is not None: rpr = deepcopy(data['representation']) rpr.pop('coefficient_subselection') rep = SphericalInvariants(**rpr) rep,n_feat = get_randomly_sparsified_soap(data, rep) else: n_feat = None rep = SphericalInvariants(**data['representation']) data['calculator'] = rep.hypers tojson(job.fn(group['fn_in']), data) # look at memory footprint p = Popen([group['executable'][data['nl_type']], job.fn(group['fn_in']), job.fn(group['fn_out'])], stdout=PIPE, stderr=PIPE) max_mem = memory_usage(p, interval=0.1, max_usage=True) # look at timings p = Popen([group['executable'][data['nl_type']], job.fn(group['fn_in']), job.fn(group['fn_out'])], stdout=PIPE, stderr=PIPE) if p.stderr.read(): print(p.stderr.read()) data = fromjson(job.fn(group['fn_out'])) data = data['results'] data['n_features'] = n_feat data['mem_max'] = max_mem data['mem_unit'] = 'MiB' tojson(job.fn(group['fn_res']), data)
def get_feature_vector(hypers, frames): with ostream_redirect(): soap = SphericalInvariants(**hypers) soap_vectors = soap.transform(frames) print('Feature vector size: %.3fMB' % (soap.get_num_coefficients() * 8.0 / 1.0e6)) feature_vector = soap_vectors.get_feature_matrix() return feature_vector
def compute_feature_selection(job): sp = _decode(job.statepoint()) st, lg = job.sp.start_structure, job.sp.n_structures frames = fromfile(job.sp.filename)[st:st + lg] soap = SphericalInvariants(**sp['representation']) managers = soap.transform(frames) compressor = RandomFilter(soap, **sp['feature_subselection']) feature_subselection = compressor.select_and_filter(managers) # if sp['feature_subselection']['Nselect'] is None: # feature_subselection['coefficient_subselection'] = None tojson(job.fn(group['feature_fn']), feature_subselection)
def benchmark_spherical_representations(frames, optimization_args, radial_basis): hypers = { 'interaction_cutoff': INTERACTION_CUTOFF, 'max_radial': 8, 'max_angular': 6, 'gaussian_sigma_constant': 0.5, 'gaussian_sigma_type': "Constant", 'cutoff_smooth_width': 0.5, 'radial_basis': radial_basis, 'optimization_args': optimization_args } print("Timing SphericalExpansion") transform_representation(SphericalExpansion(**hypers), frames, nb_iterations=NB_ITERATIONS_PER_REPRESENTATION) hypers = { 'soap_type': "PowerSpectrum", 'interaction_cutoff': INTERACTION_CUTOFF, 'max_radial': 8, 'max_angular': 6, 'gaussian_sigma_constant': 0.5, 'gaussian_sigma_type': "Constant", 'cutoff_smooth_width': 0.5, 'normalize': False, 'radial_basis': radial_basis, 'optimization_args': optimization_args } print("Timing SphericalInvariants") transform_representation(SphericalInvariants(**hypers), frames, nb_iterations=NB_ITERATIONS_PER_REPRESENTATION)
def test_pickle(self): rep = SphericalInvariants(**self.hypers) cosine_kernel = Kernel(rep, name="Cosine", target_type="Structure", zeta=2) serialized = pickle.dumps(cosine_kernel) cosine_kernel_ = pickle.loads(serialized) self.assertTrue(to_dict(cosine_kernel) == to_dict(cosine_kernel_))
def test_serialization(self): rep = SphericalInvariants(**self.hypers) rep_dict = to_dict(rep) rep_copy = from_dict(rep_dict) rep_copy_dict = to_dict(rep_copy) self.assertTrue(rep_dict == rep_copy_dict)
def test_serialization(self): rep = SphericalInvariants(**self.hypers) for target_type in ["Atom", "Structure"]: cosine_kernel = Kernel(rep, name="Cosine", target_type=target_type, zeta=2) cosine_kernel_dict = to_dict(cosine_kernel) cosine_kernel_copy = from_dict(cosine_kernel_dict) cosine_kernel_copy_dict = to_dict(cosine_kernel_copy) self.assertTrue(cosine_kernel_dict == cosine_kernel_copy_dict)
def abstractSetUp(self): example_frames = ase.io.read( "reference_data/inputs/small_molecules-20.json", ":") global_species = set() for frame in example_frames: global_species.update( [int(sp) for sp in frame.get_atomic_numbers()]) repr = SOAP( max_radial=8, max_angular=6, interaction_cutoff=4.0, cutoff_smooth_width=1.0, gaussian_sigma_type="Constant", gaussian_sigma_constant=0.3, expansion_by_species_method="user defined", global_species=sorted(list(global_species)), ) self.managers = repr.transform(example_frames) self.example_features = self.managers.get_features(repr) self.repr = repr self.example_frames = example_frames
def compute_soap(frames, n_FPS=200, soap_hypers={}): """ Computes the soap vectors and does FPS, if desired """ soap_default = dict(soap_type="PowerSpectrum", interaction_cutoff=3.5, max_radial=6, max_angular=6, gaussian_sigma_type="Constant", gaussian_sigma_constant=0.4, cutoff_smooth_width=0.5) for h in soap_default: if h not in soap_hypers: soap_hypers[h] = soap_default[h] # Compute SOAPs (from librascal tutorial) soap = SOAP(**soap_hypers) for frame in frames: frame.wrap() soap_rep = soap.transform(frames) X_raw = soap_rep.get_features(soap) num_features = X_raw.shape[1] if(n_FPS is not None): print(f"Each SOAP vector contains {num_features} components.\ \nWe will use furthest point sampling to generate a subsample containing {n_FPS} components of our SOAP vectors.") # FPS the components col_idxs, col_dist = FPS(X_raw.T, n_FPS) X = X_raw[:, col_idxs] else: X = X_raw return X#, X_split
class SymmetrizedAtomicDensityCorrelation: ''' SOAP coefficients vector, as obtained from rascal ''' def __init__(self, spherical_hypers, target): self.representation = SphericalInvariants(**spherical_hypers) assert target in ['Atom', 'Structure'] self.target = target def get_metadata(self): return { 'class': class_name(self), 'target': self.target, 'spherical_invariant_hypers': self.representation.hypers, } def compute(self, frames, center_atom_id_mask): if self.target == 'Atom': return self.representation.transform(frames).get_features(self.representation) elif self.target == 'Structure': # computes sum feature atom_features = self.representation.transform(frames).get_features(self.representation) atom_to_struc_idx = np.hstack( (0, np.cumsum([len(center_mask) for center_mask in center_atom_id_mask])) ) return np.vstack( [np.mean(atom_features[atom_to_struc_idx[i]:atom_to_struc_idx[i+1]], axis=0) for i in range(len(frames))] )
def get_randomly_sparsified_soap(data, rep): rep_hypers = deepcopy(data['representation']) feat_frac = rep_hypers['coefficient_subselection'] global_species = rep_hypers['global_species'] feature_mapping = get_feature_index_mapping(rep, global_species) ids = np.arange(len(feature_mapping)) np.random.seed(data['seed']) np.random.shuffle(ids) sel_feat = {'a': [], 'b': [], 'n1': [], 'n2': [], 'l': []} n_feat = int(feat_frac * len(feature_mapping)) for idx in ids[:n_feat]: feat = feature_mapping[idx] for k, v in feat.items(): sel_feat[k].append(int(v)) rep_hypers['coefficient_subselection'] = sel_feat soap = SphericalInvariants(**rep_hypers) return soap, n_feat
def compute_knm(job): sp = _decode(job.statepoint()) st, lg = job.sp.start_structure, job.sp.n_structures frames = fromfile(job.sp.filename)[st:st + lg] X_pseudo = load_obj(job.fn(group['sparse_point_fn'])) hypers = X_pseudo.representation._get_init_params() hypers['compute_gradients'] = job.sp.train_with_grad soap = SphericalInvariants(**hypers) kernel = Kernel(soap, **sp['kernel']) Nstructures = len(frames) Ngrad_stride = [0] Ngrads = 0 for frame in frames: n_at = len(frame) Ngrad_stride.append(n_at * 3) Ngrads += n_at * 3 Ngrad_stride = np.cumsum(Ngrad_stride) + Nstructures dump_obj(job.fn(group['kernel_fn']), kernel) if job.sp.train_with_grad: KNM = np.zeros((Nstructures + Ngrads, X_pseudo.size())) else: KNM = np.zeros((Nstructures, X_pseudo.size())) for i_frame, frame in enumerate(frames): en_row, grad_rows = compute(i_frame, frame, soap, kernel, X_pseudo, grad=job.sp.train_with_grad) KNM[i_frame] = en_row if job.sp.train_with_grad: KNM[Ngrad_stride[i_frame]:Ngrad_stride[i_frame + 1]] = grad_rows np.save(job.fn(group['knm_fn']), KNM)
def compute_sparse_point(job): sp = _decode(job.statepoint()) st, lg = job.sp.start_structure, job.sp.n_structures frames = fromfile(job.sp.filename)[st:st + lg] soap = SphericalInvariants(**sp['representation']) managers = soap.transform(frames) compressor = RandomFilter(soap, **sp['sparse_point_subselection']) compressor.select(managers) feature_subselection = fromjson(job.fn(group['feature_fn'])) sp['representation']['coefficient_subselection'] = feature_subselection[ 'coefficient_subselection'] soap = SphericalInvariants(**sp['representation']) managers = soap.transform(frames) compressor._representation = soap X_pseudo = compressor.filter(managers) dump_obj(job.fn(group['sparse_point_fn']), X_pseudo)
def benchmark_spherical_representations(frames, optimization_args, radial_basis): hypers = { "interaction_cutoff": INTERACTION_CUTOFF, "max_radial": 8, "max_angular": 6, "gaussian_sigma_constant": 0.5, "gaussian_sigma_type": "Constant", "cutoff_smooth_width": 0.5, "radial_basis": radial_basis, "optimization": optimization_args, } print("Timing SphericalExpansion") transform_representation( SphericalExpansion(**hypers), frames, nb_iterations=NB_ITERATIONS_PER_REPRESENTATION, ) hypers = { "soap_type": "PowerSpectrum", "interaction_cutoff": INTERACTION_CUTOFF, "max_radial": 8, "max_angular": 6, "gaussian_sigma_constant": 0.5, "gaussian_sigma_type": "Constant", "cutoff_smooth_width": 0.5, "normalize": False, "radial_basis": radial_basis, "optimization": optimization_args, } print("Timing SphericalInvariants") transform_representation( SphericalInvariants(**hypers), frames, nb_iterations=NB_ITERATIONS_PER_REPRESENTATION, )
def test_hypers_construction(self): """Checks that manually-constructed and automatic framework are consistent.""" hypers = deepcopy(self.hypers) hypers["max_radial"] = self.expanded_max_radial spex = SphericalExpansion(**hypers) feats = spex.transform(self.frames).get_features_by_species(spex) cov = get_radial_basis_covariance(spex, feats) p_val, p_vec = get_radial_basis_pca(cov) p_mat = get_radial_basis_projections(p_vec, self.hypers["max_radial"]) # now makes this SOAP hypers["max_radial"] = self.hypers["max_radial"] hypers["soap_type"] = "PowerSpectrum" hypers["optimization"] = { "RadialDimReduction": { "projection_matrices": p_mat }, "Spline": { "accuracy": 1e-8 }, } # compute SOAP soap_opt = SphericalInvariants(**hypers) soap_feats = soap_opt.transform(self.frames).get_features(soap_opt) # now we do the same with the compact utils hypers = deepcopy(self.hypers) hypers["soap_type"] = "PowerSpectrum" hypers = get_optimal_radial_basis_hypers( hypers, self.frames, expanded_max_radial=self.expanded_max_radial) soap_opt_2 = SphericalInvariants(**hypers) soap_feats_2 = soap_opt_2.transform( self.frames).get_features(soap_opt_2) self.assertTrue(np.allclose(soap_feats, soap_feats_2))
mask_center_atoms_by_species(molecule, species_select=[ 'C', ]) # Also works by atomic number #mask_center_atoms_by_species(molecule, species_select=[6,]) hypers = { 'interaction_cutoff': 5.0, 'cutoff_smooth_width': 0.5, 'max_radial': 8, 'max_angular': 6, 'gaussian_sigma_type': "Constant", 'gaussian_sigma_constant': 0.3 } representation = SphericalInvariants(**hypers) atoms_transformed = representation.transform(molecules) print("Number of feature vectors computed: {:d}".format( atoms_transformed.get_features(representation).shape[0])) print("Now masking out the first 5 atoms of each molecule.") n_remaining_centers = sum( np.sum((mol.get_atomic_numbers()[5:] == 6)) for mol in molecules) print("Number of centres remaining: {:d}".format(n_remaining_centers)) for molecule in molecules: mask_center_atoms_by_id(molecule, id_blacklist=np.arange(5)) atoms_transformed = representation.transform(molecules) print("Number of feature vectors computed: {:d}".format( atoms_transformed.get_features(representation).shape[0]))
def dump_reference_json(): sys.path.insert(0, join(root, "build/")) sys.path.insert(0, join(root, "tests/")) from rascal.representations import SphericalInvariants from ase.io import read np.random.seed(10) fns = [ "diamond_2atom_distorted.json", "CaCrP2O7_mvc-11955_symmetrized.json", "methane.json", ] soap_types = ["PowerSpectrum"] Nselects = ["all", "all_random", "8_random"] sparsification_inputs = [] for fn, soap_type, Nselect in product(fns, soap_types, Nselects): frames = read(join(inputs_path, fn), ":") hypers = dict( soap_type=soap_type, interaction_cutoff=3.5, max_radial=2, max_angular=2, gaussian_sigma_constant=0.4, gaussian_sigma_type="Constant", cutoff_smooth_width=0.5, normalize=False, compute_gradients=True, expansion_by_species_method="structure wise", ) soap = SphericalInvariants(**hypers) managers = soap.transform(frames) hyp = deepcopy(hypers) # select some features from the possible set mapping = soap.get_feature_index_mapping(managers) selected_features = {key: [] for key in mapping[0].keys()} ids = np.array([key for key in mapping.keys()]) if Nselect == "all": pass elif Nselect == "all_random": np.random.shuffle(ids) elif Nselect == "8_random": np.random.shuffle(ids) ids = ids[:8] else: raise NotImplementedError() for idx in ids: coef_idx = mapping[idx] for key in selected_features.keys(): selected_features[key].append(int(coef_idx[key])) # selected_features_global_ids is important for the tests selected_features["selected_features_global_ids"] = ids.tolist() mapp = dict(coefficient_subselection=selected_features) hyp.update(mapp) soap_s = SphericalInvariants(**hyp) managers_s = soap_s.transform(frames) sparsification_inputs.append( dict( hypers=dict( rep=soap.hypers, rep_sparse=soap_s.hypers, adaptors=json.loads(managers_s.managers.get_parameters()), ), filename=join(read_inputs_path, fn), Nselect=Nselect, )) fn_out = join(root, dump_path, "sparsification_inputs.json") print(fn_out) with open(fn_out, "w") as f: sparsification_inputs_pretty = prettyjson(sparsification_inputs, indent=2, maxlinelength=80) f.write(sparsification_inputs_pretty)
def dump_reference_json(): import ubjson import os from copy import copy sys.path.insert(0, os.path.join(root, 'build/')) sys.path.insert(0, os.path.join(root, 'tests/')) cutoffs = [3.5] gaussian_sigmas = [0.5] max_radials = [6] max_angulars = [6] soap_types = ["RadialSpectrum", "PowerSpectrum"] fn = os.path.join(inputs_path, "small_molecules-20.json") fn_to_write = os.path.join( 'reference_data', "inputs", "small_molecules-20.json") start = 0 length = 5 representations = ['spherical_invariants'] kernel_names = ['Cosine'] target_types = ['Structure', 'Atom'] dependant_args = dict(Cosine=[dict(zeta=1), dict(zeta=2), dict(zeta=4)]) data = dict(filename=fn_to_write, start=start, length=length, cutoffs=cutoffs, gaussian_sigmas=gaussian_sigmas, max_radials=max_radials, soap_types=soap_types, kernel_names=kernel_names, target_types=target_types, dependant_args=dependant_args, rep_info=dict(spherical_invariants=[])) frames = read(fn, '{}:{}'.format(start, start + length)) for representation_name in representations: for cutoff in cutoffs: print(fn, cutoff) data['rep_info'][representation_name].append([]) for kernel_name in kernel_names: for target_type in target_types: for kwargs in dependant_args[kernel_name]: for soap_type in soap_types: for gaussian_sigma in gaussian_sigmas: for max_radial in max_radials: for max_angular in max_angulars: if 'RadialSpectrum' == soap_type: max_angular = 0 hypers = {"interaction_cutoff": cutoff, "cutoff_smooth_width": 0.5, "max_radial": max_radial, "max_angular": max_angular, "gaussian_sigma_type": "Constant", "gaussian_sigma_constant": gaussian_sigma, "soap_type": soap_type, "cutoff_function_type": "ShiftedCosine", "normalize": True, "radial_basis": "GTO"} soap = SphericalInvariants(**hypers) soap_vectors = soap.transform(frames) hypers_kernel = dict(name=kernel_name, target_type=target_type) hypers_kernel.update(**kwargs) kernel = Kernel(soap, **hypers_kernel) kk = kernel(soap_vectors) # x = get_spectrum(hypers, frames) for aa in soap.nl_options: aa['initialization_arguments'] = aa['args'] data['rep_info'][representation_name][-1].append(dict(kernel_matrix=kk.tolist(), hypers_rep=copy( soap.hypers), hypers_manager=copy( soap.nl_options), hypers_kernel=copy(hypers_kernel))) with open(os.path.join(root, dump_path, "kernel_reference.ubjson"), 'wb') as f: ubjson.dump(data, f)
def dump_reference_json(): import ubjson import os from copy import copy path = '../' sys.path.insert(0, os.path.join(path, 'build/')) sys.path.insert(0, os.path.join(path, 'tests/')) cutoffs = [2, 3] gaussian_sigmas = [0.2, 0.5] max_radials = [4, 10] max_angulars = [3, 6] soap_types = ["RadialSpectrum", "PowerSpectrum", "BiSpectrum"] inversion_symmetry = False fns = [ os.path.join( path, "tests/reference_data/CaCrP2O7_mvc-11955_symmetrized.json"), os.path.join(path, "tests/reference_data/small_molecule.json") ] fns_to_write = [ "reference_data/CaCrP2O7_mvc-11955_symmetrized.json", "reference_data/small_molecule.json", ] data = dict(filenames=fns_to_write, cutoffs=cutoffs, gaussian_sigmas=gaussian_sigmas, max_radials=max_radials, soap_types=soap_types, rep_info=[]) for fn in fns: frames = [json2ase(load_json(fn))] for cutoff in cutoffs: print(fn, cutoff) data['rep_info'].append([]) for soap_type in soap_types: for gaussian_sigma in gaussian_sigmas: for max_radial in max_radials: for max_angular in max_angulars: if 'RadialSpectrum' == soap_type: max_angular = 0 if "BiSpectrum" == soap_type: max_radial = 2 max_angular = 1 inversion_symmetry = True hypers = { "interaction_cutoff": cutoff, "cutoff_smooth_width": 0.5, "max_radial": max_radial, "max_angular": max_angular, "gaussian_sigma_type": "Constant", "normalize": True, "cutoff_function_type": "Cosine", "radial_basis": "GTO", "gaussian_sigma_constant": gaussian_sigma, "soap_type": soap_type, "inversion_symmetry": inversion_symmetry, } soap = SphericalInvariants(**hypers) soap_vectors = soap.transform(frames) x = soap_vectors.get_feature_matrix() # x = get_feature_vector(hypers, frames) data['rep_info'][-1].append( dict(feature_matrix=x.tolist(), hypers=copy(soap.hypers))) with open( path + "tests/reference_data/spherical_invariants_reference.ubjson", 'wb') as f: ubjson.dump(data, f)
class IndexConversionTest(unittest.TestCase): """Test index conversion utilities for filtering""" def setUp(self): self.example_frames = ase.io.read( "reference_data/inputs/small_molecules-20.json", ":5") global_species = set() for frame in self.example_frames: global_species.update( [int(sp) for sp in frame.get_atomic_numbers()]) self.global_species = global_species self.repr = SOAP( max_radial=3, max_angular=0, soap_type="RadialSpectrum", interaction_cutoff=4.0, cutoff_smooth_width=1.0, gaussian_sigma_type="Constant", gaussian_sigma_constant=0.3, expansion_by_species_method="user defined", global_species=sorted(list(global_species)), ) self.managers = self.repr.transform(self.example_frames) self.example_features = self.managers.get_features(self.repr) def test_split_by_sp(self): """Test the feature matrix species splitter""" # Let's try a basic example matrix first X = np.concatenate([ frame.get_atomic_numbers() for frame in self.example_frames ])[:, np.newaxis] sps = list(self.global_species) X_by_sp = filter._split_feature_matrix_by_species( self.managers, X, sps) for sp in sps: self.assertTrue(np.all(X_by_sp[sp] == sp)) # Now with the actual feature matrix X_by_sp = filter._split_feature_matrix_by_species( self.managers, self.example_features, sps) X_by_sp_manual = {} atoms_species = X.flatten() for sp in sps: X_by_sp_manual = self.example_features[atoms_species == sp] self.assertTrue(np.all(X_by_sp_manual == X_by_sp[sp])) def test_indices_global(self): """Test the global-to-perstructure index transformation""" example_idces_global = [0, 2, 16, 17, 82] example_idces_perstructure = filter._indices_manager_to_perstructure( self.managers, example_idces_global) self.assertEqual(example_idces_perstructure, [[0, 2, 16], [0], [], [], [19]]) # It should also work with an ASE list of atoms example_idces_perstructure = filter._indices_manager_to_perstructure( self.example_frames, example_idces_global) self.assertEqual(example_idces_perstructure, [[0, 2, 16], [0], [], [], [19]]) def test_indices_global_out_of_range(self): """Test the index transformer with out-of-range indices""" with self.assertRaisesRegex( ValueError, rf"Selected index\(es\): \[83\] out of range"): filter._indices_manager_to_perstructure( self.managers, [ 83, ], ) bad_indices = list(range(83, 90)) bad_indices_str = "83, 84, ..., 88, 89" with self.assertRaisesRegex( ValueError, rf"Selected index\(es\): \[{bad_indices_str}\] out of range"): filter._indices_manager_to_perstructure(self.managers, bad_indices) def test_indices_perspecies(self): """Test the per-species, global-to-perstructure index transformation""" example_idces_perspecies = { 1: [0, 1, 20], 8: [1, 2, 5], 6: [], 7: [0, 4] } # The order of the species should not matter; # the output is always sorted by species sps = [7, 6, 8, 1] perspecies_idces = filter._indices_perspecies_manager_to_perstructure( self.managers, example_idces_perspecies, sps) self.assertEqual(perspecies_idces, [[8, 9, 4], [], [9, 7, 0, 8], [], [0]]) # Try it with an iterator (instead of a list) of species perspecies_idces = filter._indices_perspecies_manager_to_perstructure( self.managers, example_idces_perspecies, example_idces_perspecies.keys()) self.assertEqual(perspecies_idces, [[8, 9, 4], [], [9, 7, 0, 8], [], [0]]) # And a set perspecies_idces = filter._indices_perspecies_manager_to_perstructure( self.managers, example_idces_perspecies, set(sps)) self.assertEqual(perspecies_idces, [[8, 9, 4], [], [9, 7, 0, 8], [], [0]]) def test_indices_perspecies_out_of_range(self): """Test the per-species index transformer with out-of-range indices""" bad_idces_perspecies = {1: [], 8: [6], 7: [], 6: []} sps = [1, 6, 7, 8] with self.assertRaisesRegex( ValueError, r"Selected index\(es\): \[6\] for species 8 out of range"): filter._indices_perspecies_manager_to_perstructure( self.managers, bad_idces_perspecies, sps) nonexistent_idces_perspecies = {1: [], 8: [], 7: [], 6: [], 12: [0]} sps = [1, 6, 7, 8, 12] with self.assertRaisesRegex( ValueError, r"Selected index\(es\): \[0\] for species 12 out of range " r"\(species does not appear to be present\)", ): filter._indices_perspecies_manager_to_perstructure( self.managers, nonexistent_idces_perspecies, sps) def test_indices_bad_species(self): """Test the global-to-perstructure selection with bad species lists""" # Missing species sps_bad = [1, 6, 8] sps_bad_re_str = r"\[1, 6, 8\]" with self.assertRaisesRegex( ValueError, f"^Atom of type 7 found but was not listed in sps: {sps_bad_re_str}$", ): filter._indices_perspecies_manager_to_perstructure( self.managers, {sp: [] for sp in sps_bad}, sps_bad) # Duplicated species sps_bad = [1, 6, 6, 7, 8] with self.assertRaisesRegex( ValueError, rf"^List of species contains duplicated entries: \[1, 6, 6, 7, 8\]", ): filter._indices_perspecies_manager_to_perstructure( self.managers, {sp: [0] for sp in sps_bad}, sps_bad)
def test_pickle(self): rep = SphericalInvariants(**self.hypers) serialized = pickle.dumps(rep) rep_ = pickle.loads(serialized) self.assertTrue(to_dict(rep) == to_dict(rep_))
def test_representation_gradient(self): """ Test the get_features and get_features_gradient functions by computing the linear sparse kernel matrix and check that the exported features lead to the same kernel matrix as the reference method. """ hypers = deepcopy(self.hypers) hypers["compute_gradients"] = True rep = SphericalInvariants(**hypers) features = rep.transform(self.frames) n_sparses = {1: 1, 6: 1, 8: 1, 14: 1, 15: 1, 20: 1, 24: 1} compressor = FPSFilter(rep, n_sparses, act_on="sample per species") X_pseudo = compressor.select_and_filter(features) xs = X_pseudo.get_features() n_sparse, n_feat = xs.shape masks = {sp: np.zeros(n_sparse, dtype=bool) for sp in n_sparses} ii = 0 for sp, mask in masks.items(): mask[ii:ii + n_sparses[sp]] = 1 ii = ii + n_sparses[sp] zeta = 1 kernel = Kernel(rep, name="GAP", zeta=zeta, target_type="Structure", kernel_type="Sparse") ij = features.get_gradients_info() n_atoms = len(np.unique(ij[:, 1])) n_neigh = ij.shape[0] KNM_ref = kernel(features, X_pseudo, (False, False)) X = features.get_features(rep).reshape((n_atoms, n_feat)) KNM = np.zeros((len(self.frames), n_sparse)) ii = 0 for iff, frame in enumerate(features): for at in frame: sp = at.atom_type KNM[iff, masks[sp]] += np.dot(X[ii], xs[masks[sp]].T) ii += 1 self.assertTrue(np.allclose(KNM_ref, KNM)) KNM_ref = kernel(features, X_pseudo, (True, False)) X_der = features.get_features_gradient(rep).reshape( (n_neigh, 3, n_feat)) KNM = np.zeros((n_atoms, 3, n_sparse)) for ii, (i_frame, i, j, i_sp, j_sp) in enumerate(ij): sp = i_sp KNM[j, 0, masks[sp]] += np.dot(X_der[ii, 0], xs[masks[sp]].T) KNM[j, 1, masks[sp]] += np.dot(X_der[ii, 1], xs[masks[sp]].T) KNM[j, 2, masks[sp]] += np.dot(X_der[ii, 2], xs[masks[sp]].T) KNM = KNM.reshape((-1, n_sparse)) self.assertTrue(np.allclose(KNM_ref, KNM))
def compute_benchmark(job): from rascal.models.krr import compute_sparse_kernel_gradients sp = _decode(job.statepoint()) st, lg = job.sp.start_structure, job.sp.n_structures frames = fromfile(job.sp.filename)[st:st + lg] model = load_obj(job.fn(group['model_fn'])) soap = model.get_representation_calculator() grads_timing = job.sp.grads_timing hypers = soap._get_init_params() hypers['compute_gradients'] = grads_timing soap = SphericalInvariants(**hypers) rc = sp['representation']['interaction_cutoff'] nl_options = [ dict(name='centers', args=[]), dict(name='neighbourlist', args=dict(cutoff=rc)), # dict(name='halflist', args=dict()), dict(name="centercontribution", args=dict()), dict(name='strict', args=dict(cutoff=rc)) ] kernel = Kernel(soap, **sp['kernel']) N_ITERATIONS = sp['N_ITERATIONS'] if grads_timing: tags = ['NL', 'rep with grad', 'pred energy', 'pred forces'] else: tags = ['NL', 'rep', 'pred energy'] timers = {k: Timer(tag=k, logger=None) for k in tags} if job.sp.name != 'qm9': frames = [ make_supercell(frames[0], job.sp.n_replication * np.eye(3), wrap=True, tol=1e-11) ] else: frames = frames[:100] if grads_timing: for ii in range(N_ITERATIONS): with timers['NL']: managers = AtomsList(frames, nl_options) sleep(0.1) with timers['rep with grad']: managers = soap.transform(managers) sleep(0.1) Y0 = model._get_property_baseline(managers) with timers['pred energy']: KNM = kernel(managers, model.X_train, (False, False)) Y0 + np.dot(KNM, model.weights).reshape((-1)) sleep(0.1) with timers['pred forces']: rep = soap._representation forces = -compute_sparse_kernel_gradients( rep, model.kernel._kernel, managers.managers, model.X_train._sparse_points, model.weights.reshape( (1, -1))) sleep(0.1) managers, KNM = [], [] del managers, KNM sleep(0.3) else: for ii in range(N_ITERATIONS): with timers['NL']: managers = AtomsList(frames, nl_options) sleep(0.1) with timers['rep']: managers = soap.transform(managers) sleep(0.1) Y0 = model._get_property_baseline(managers) with timers['pred energy']: KNM = kernel(managers, model.X_train, (False, False)) Y0 + np.dot(KNM, model.weights).reshape((-1)) sleep(0.1) managers, KNM = [], [] del managers, KNM sleep(0.3) n_atoms = 0 for frame in frames: n_atoms += len(frame) timings = [] for tag in tags: data = timers[tag].dumps() data.update({'name': job.sp.name, 'n_atoms': n_atoms}) timings.append(data) tojson(job.fn(group['benchmark_fn']), timings)
def compute_squared_radial_spectrum_wasserstein_distance( feature_paramaters, frames, nb_grid_points=200): if feature_paramaters["feature_parameters"][ "soap_type"] != "RadialSpectrum": raise ValueError( 'Wasserstein features can be only computed for soap_type="RadialSpectrum".' ) if feature_paramaters["feature_parameters"]["radial_basis"] != "DVR": raise ValueError( 'Wasserstein features can be only computed for radial_basis="DVR".' ) nb_basis_functions = feature_paramaters["feature_parameters"]["max_radial"] feature_paramaters["feature_parameters"]["max_radial"] = nb_grid_points normalize_wasserstein_features = feature_paramaters["feature_parameters"][ "normalize"] feature_paramaters["feature_parameters"]["normalize"] = False cutoff = feature_paramaters["feature_parameters"]["interaction_cutoff"] # compute soap representation for interpolation representation = SphericalInvariants( **feature_paramaters["feature_parameters"]) densities = representation.transform(frames).get_features(representation) nb_envs = densities.shape[0] nb_species = densities.shape[1] // nb_grid_points densities = densities.reshape(nb_envs * nb_species, nb_grid_points) # DVR uses gaussian quadrature points as basis function, we reproduce the original grid points for the interpolation density_grid, density_weights = np.polynomial.legendre.leggauss( nb_grid_points) density_grid = density_grid * cutoff / 2 + cutoff / 2 density_grid = np.hstack((0, density_grid)) densities /= np.sqrt(density_weights) cdf = np.cumsum(densities, axis=1) # gaussian quadrature points as grid if feature_paramaters["hilbert_space_parameters"]["distance_parameters"][ "grid_type"] == "gaussian_quadrature": interp_grid, interp_weights = np.polynomial.legendre.leggauss( nb_basis_functions) interp_grid = interp_grid / 2 + 0.5 elif feature_paramaters["hilbert_space_parameters"]["distance_parameters"][ "grid_type"] == "equispaced": interp_grid = np.linspace(0, 1, nb_basis_functions) else: raise ValueError( "The wasserstein grid_type=" + feature_parameters["distance_parameters"]["grid_type"] + " is not known.") # normalize nonzero environments nonzero_mask = cdf[:, -1] != 0 # insert the zero probabilty point at the beginning to help interpolating at the beginning cdf = np.concatenate((np.zeros((cdf.shape[0], 1)), cdf), axis=1) dist = np.zeros((nb_envs, nb_envs)) if feature_paramaters["hilbert_space_parameters"]["distance_parameters"][ "delta_normalization"]: cdf = cdf.reshape(nb_envs, nb_species, nb_grid_points + 1) # potential bug when species are present for i in range(nb_envs): # subset of nb_envs*nb_species for j in range(nb_envs): # subset of nb_envs*nb_species for sp in range(nb_species): max_norm = max(cdf[i, sp, -1], cdf[j, sp, -1]) cdf_i = np.copy(cdf[i, sp, :]) cdf_j = np.copy(cdf[j, sp, :]) cdf_i[-1] = max_norm cdf_j[-1] = max_norm cdf_i /= max_norm cdf_j /= max_norm interpolator_i = interp1d(cdf_i, density_grid, assume_sorted=True) interpolator_j = interp1d(cdf_j, density_grid, assume_sorted=True) wasserstein_features_i = interpolator_i(interp_grid) wasserstein_features_j = interpolator_j(interp_grid) dist[i, j] += np.sum( (wasserstein_features_i - wasserstein_features_j)**2) return dist else: cdf[nonzero_mask] /= cdf[:, -1][nonzero_mask][:, np.newaxis] wasserstein_features = np.zeros( (nb_envs * nb_species, nb_basis_functions)) for i in np.where(nonzero_mask)[0]: # subset of nb_envs*nb_species interpolator = interp1d(cdf[i, :], density_grid, assume_sorted=True) wasserstein_features[i, :] = interpolator(interp_grid) if feature_paramaters["hilbert_space_parameters"][ "distance_parameters"]["grid_type"] == "gaussian_quadrature": wasserstein_features *= np.sqrt(interp_weights) wasserstein_features = wasserstein_features.reshape( nb_envs, nb_species * nb_basis_functions) if normalize_wasserstein_features: wasserstein_features /= np.linalg.norm(wasserstein_features, axis=1)[:, np.newaxis] return squareform(pdist(wasserstein_features))
def compute_radial_spectrum_wasserstein_features(feature_paramaters, frames): """Compute""" if feature_paramaters["soap_parameters"]["soap_type"] != "RadialSpectrum": raise ValueError( 'Wasserstein features can be only computed for soap_type="RadialSpectrum".' ) if feature_paramaters["soap_parameters"]["radial_basis"] != "DVR": raise ValueError( 'Wasserstein features can be only computed for radial_basis="DVR".' ) nb_basis_functions = feature_paramaters["nb_basis_functions"] nb_grid_points = feature_paramaters["soap_parameters"]["max_radial"] normalize_wasserstein_features = feature_paramaters["soap_parameters"][ "normalize"] feature_paramaters["soap_parameters"]["normalize"] = False cutoff = feature_paramaters["soap_parameters"]["interaction_cutoff"] # compute soap representation for interpolation representation = SphericalInvariants( **feature_paramaters["soap_parameters"]) densities = representation.transform(frames).get_features(representation) nb_envs = densities.shape[0] nb_species = densities.shape[1] // nb_grid_points densities = densities.reshape(nb_envs * nb_species, nb_grid_points) # DVR uses gaussian quadrature points as basis function, we reproduce the original grid points for the interpolation density_grid, density_weights = np.polynomial.legendre.leggauss( nb_grid_points) density_grid = density_grid * cutoff / 2 + cutoff / 2 densities /= np.sqrt(density_weights) cdf = scipy.integrate.cumtrapz(densities, density_grid) # insert the zero probabilty point at the beginning to help interpolating at the beginning cdf = np.hstack((np.zeros((cdf.shape[0], 1)), cdf)) if feature_paramaters["delta_normalization"]: cdf = cdf.reshape(nb_envs, nb_species, nb_grid_points) delta_sigma = feature_paramaters["delta_sigma"] delta_offset_percentage = feature_paramaters["delta_offset_percentage"] if delta_sigma is None: for i in range(nb_species): max_norm = np.max(cdf[:, i, -1]) max_norm += delta_offset_percentage * max_norm cdf[:, i, -1] += max_norm - cdf[:, i, -1] else: for i in range(nb_species): cdf[:, i, :] = bump_function(density_grid, cdf[:, i, :], cutoff, delta_sigma) cdf = cdf.reshape(nb_envs * nb_species, nb_grid_points) # normalize nonzero environments nonzero_mask = cdf[:, -1] != 0 cdf[nonzero_mask] /= cdf[:, -1][nonzero_mask][:, np.newaxis] # gaussian quadrature points as grid if feature_paramaters["grid_type"] == "gaussian_quadrature": interp_grid, interp_weights = np.polynomial.legendre.leggauss( nb_basis_functions) interp_grid = interp_grid / 2 + 0.5 elif feature_paramaters["grid_type"] == "equispaced": interp_grid = np.linspace(0, 1, nb_basis_functions) else: raise ValueError("The wasserstein grid_type=" + feature_paramaters["grid_type"] + " is not known.") wasserstein_features = np.zeros((nb_envs * nb_species, nb_basis_functions)) # add jitter for uniqueness jitter = np.finfo(0.1).tiny * np.arange(cdf.shape[1]) cdf += jitter[np.newaxis, :] for i in np.where(nonzero_mask)[0]: # subset of nb_envs*nb_species interpolator = interp1d(cdf[i], density_grid, assume_sorted=True, kind='linear') wasserstein_features[i, :] = interpolator(interp_grid) # delta normalization 2 sets delta areas to 0 so they cannot be used as features if feature_paramaters["delta_normalization"] == 2: wasserstein_features[cutoff - 1e-3 <= wasserstein_features] = 0 if feature_paramaters["grid_type"] == "gaussian_quadrature": wasserstein_features *= np.sqrt(interp_weights) wasserstein_features = wasserstein_features.reshape( nb_envs, nb_species * nb_basis_functions) if normalize_wasserstein_features: wasserstein_features /= np.linalg.norm(wasserstein_features, axis=1)[:, np.newaxis] return wasserstein_features
def __init__(self, spherical_hypers, target): self.representation = SphericalInvariants(**spherical_hypers) assert target in ['Atom', 'Structure'] self.target = target