コード例 #1
0
    def test_model_call(self):

        rep = SphericalInvariants(**self.hypers)

        features = rep.transform([self.frame])

        for target_type in ["Atom", "Structure"]:
            cosine_kernel = Kernel(rep,
                                   name="Cosine",
                                   target_type=target_type,
                                   zeta=2)
            cosine_kernel(features)

        # wrong name
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="WrongName", target_type="Structure", zeta=2)
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="cosine", target_type="Structure", zeta=2)
        # wrong target_type
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="Cosine", target_type="WrongType", zeta=2)
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="Cosine", target_type="structure", zeta=2)
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="Cosine", target_type="atom", zeta=2)
        # wrong zeta
        with self.assertRaises(ValueError):
            Kernel(rep, name="Cosine", target_type="Structure", zeta=2.5)
        with self.assertRaises(ValueError):
            Kernel(rep, name="Cosine", target_type="Structure", zeta=-2)
コード例 #2
0
    def test_representation_transform(self):

        rep = SphericalInvariants(**self.hypers)

        features = rep.transform(self.frames)

        test = features.get_features(rep)
        kk_ref = np.dot(test, test.T)

        # test that the feature matrix exported to python in various ways
        # are equivalent
        X_t = features.get_features(rep, self.global_species)
        kk = np.dot(X_t, X_t.T)
        self.assertTrue(np.allclose(kk, kk_ref))

        X_t = features.get_features(rep, self.global_species + [70])
        kk = np.dot(X_t, X_t.T)
        self.assertTrue(np.allclose(kk, kk_ref))

        species = copy(self.global_species)
        species.pop()
        X_t = features.get_features(rep, species)
        kk = np.dot(X_t, X_t.T)
        self.assertFalse(np.allclose(kk, kk_ref))

        X_t = features.get_features_by_species(rep)
        kk = dot(X_t, X_t)
        self.assertTrue(np.allclose(kk, kk_ref))
コード例 #3
0
def compute_representation(feature_hypers, frames, center_atom_id_mask):
    if feature_hypers["feature_type"] == "soap":
        representation = SphericalInvariants(**feature_hypers["feature_parameters"])
        return representation.transform(frames).get_features(representation)
    elif feature_hypers["feature_type"] == "wasserstein":
        return compute_radial_spectrum_wasserstein_features(feature_hypers["feature_parameters"], frames)
    elif feature_hypers["feature_type"] == "sorted_distances":
        features = compute_sorted_distances(feature_hypers["feature_parameters"], frames, center_atom_id_mask)
        return features
    elif feature_hypers["feature_type"] == "precomputed":
        print("WARNING we assume for the precomputed features that only one environment in each structure was computed.")
        parameters = feature_hypers['feature_parameters']
        nb_envs = sum([len(structure_mask) for structure_mask in center_atom_id_mask])
        if parameters["filetype"] == "npy":
            pathname = f"{FEATURES_ROOT}/{parameters['feature_name']}/{parameters['filename']}"
            return np.load(pathname)[:nb_envs]
        elif parameters["filetype"] == "txt":
            pathname = f"{FEATURES_ROOT}/{parameters['feature_name']}/{parameters['filename']}"
            return np.loadtxt(pathname)[:nb_envs]
        elif parameters["filetype"] == "frame_info":
            return np.array([frame.info[parameters['feature_name']] for frame in frames])[:,np.newaxis][:nb_envs]

        # hardcoded case
        elif parameters["feature_name"] == "displaced_hydrogen_distance": 
            return load_hydrogen_distance_dataset(frames)[:nb_envs]
コード例 #4
0
    def test_representation_transform(self):

        rep = SphericalInvariants(**self.hypers)

        features = rep.transform([self.frame])

        test = features.get_dense_feature_matrix(rep)
コード例 #5
0
def compute_si_cpp(job):
    # setup input for the script
    data = job.statepoint()
    np.random.seed(data['seed'])

    if data['representation']['coefficient_subselection'] is not None:
        rpr = deepcopy(data['representation'])
        rpr.pop('coefficient_subselection')
        rep = SphericalInvariants(**rpr)
        rep,n_feat = get_randomly_sparsified_soap(data, rep)
    else:
        n_feat = None
        rep = SphericalInvariants(**data['representation'])

    data['calculator'] = rep.hypers
    tojson(job.fn(group['fn_in']), data)
    # look at memory footprint
    p = Popen([group['executable'][data['nl_type']], job.fn(group['fn_in']), job.fn(group['fn_out'])], stdout=PIPE, stderr=PIPE)
    max_mem = memory_usage(p, interval=0.1, max_usage=True)
    # look at timings
    p = Popen([group['executable'][data['nl_type']], job.fn(group['fn_in']), job.fn(group['fn_out'])], stdout=PIPE, stderr=PIPE)
    if p.stderr.read(): print(p.stderr.read())
    data = fromjson(job.fn(group['fn_out']))
    data = data['results']
    data['n_features'] = n_feat
    data['mem_max'] = max_mem
    data['mem_unit'] = 'MiB'
    tojson(job.fn(group['fn_res']), data)
コード例 #6
0
def get_feature_vector(hypers, frames):
    with ostream_redirect():
        soap = SphericalInvariants(**hypers)
        soap_vectors = soap.transform(frames)
        print('Feature vector size: %.3fMB' %
              (soap.get_num_coefficients() * 8.0 / 1.0e6))
        feature_vector = soap_vectors.get_feature_matrix()
    return feature_vector
コード例 #7
0
def compute_feature_selection(job):
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]
    soap = SphericalInvariants(**sp['representation'])
    managers = soap.transform(frames)
    compressor = RandomFilter(soap, **sp['feature_subselection'])
    feature_subselection = compressor.select_and_filter(managers)
    # if sp['feature_subselection']['Nselect'] is None:
    #     feature_subselection['coefficient_subselection'] = None
    tojson(job.fn(group['feature_fn']), feature_subselection)
コード例 #8
0
def benchmark_spherical_representations(frames, optimization_args,
                                        radial_basis):
    hypers = {
        'interaction_cutoff': INTERACTION_CUTOFF,
        'max_radial': 8,
        'max_angular': 6,
        'gaussian_sigma_constant': 0.5,
        'gaussian_sigma_type': "Constant",
        'cutoff_smooth_width': 0.5,
        'radial_basis': radial_basis,
        'optimization_args': optimization_args
    }

    print("Timing SphericalExpansion")
    transform_representation(SphericalExpansion(**hypers),
                             frames,
                             nb_iterations=NB_ITERATIONS_PER_REPRESENTATION)

    hypers = {
        'soap_type': "PowerSpectrum",
        'interaction_cutoff': INTERACTION_CUTOFF,
        'max_radial': 8,
        'max_angular': 6,
        'gaussian_sigma_constant': 0.5,
        'gaussian_sigma_type': "Constant",
        'cutoff_smooth_width': 0.5,
        'normalize': False,
        'radial_basis': radial_basis,
        'optimization_args': optimization_args
    }
    print("Timing SphericalInvariants")
    transform_representation(SphericalInvariants(**hypers),
                             frames,
                             nb_iterations=NB_ITERATIONS_PER_REPRESENTATION)
コード例 #9
0
 def test_pickle(self):
     rep = SphericalInvariants(**self.hypers)
     cosine_kernel = Kernel(rep,
                            name="Cosine",
                            target_type="Structure",
                            zeta=2)
     serialized = pickle.dumps(cosine_kernel)
     cosine_kernel_ = pickle.loads(serialized)
     self.assertTrue(to_dict(cosine_kernel) == to_dict(cosine_kernel_))
コード例 #10
0
    def test_serialization(self):
        rep = SphericalInvariants(**self.hypers)

        rep_dict = to_dict(rep)

        rep_copy = from_dict(rep_dict)

        rep_copy_dict = to_dict(rep_copy)

        self.assertTrue(rep_dict == rep_copy_dict)
コード例 #11
0
    def test_serialization(self):
        rep = SphericalInvariants(**self.hypers)

        for target_type in ["Atom", "Structure"]:
            cosine_kernel = Kernel(rep, name="Cosine", target_type=target_type, zeta=2)

            cosine_kernel_dict = to_dict(cosine_kernel)
            cosine_kernel_copy = from_dict(cosine_kernel_dict)
            cosine_kernel_copy_dict = to_dict(cosine_kernel_copy)

            self.assertTrue(cosine_kernel_dict == cosine_kernel_copy_dict)
コード例 #12
0
ファイル: test_filter.py プロジェクト: Luthaf/librascal
 def abstractSetUp(self):
     example_frames = ase.io.read(
         "reference_data/inputs/small_molecules-20.json", ":")
     global_species = set()
     for frame in example_frames:
         global_species.update(
             [int(sp) for sp in frame.get_atomic_numbers()])
     repr = SOAP(
         max_radial=8,
         max_angular=6,
         interaction_cutoff=4.0,
         cutoff_smooth_width=1.0,
         gaussian_sigma_type="Constant",
         gaussian_sigma_constant=0.3,
         expansion_by_species_method="user defined",
         global_species=sorted(list(global_species)),
     )
     self.managers = repr.transform(example_frames)
     self.example_features = self.managers.get_features(repr)
     self.repr = repr
     self.example_frames = example_frames
コード例 #13
0
def compute_soap(frames, n_FPS=200, soap_hypers={}):
    """
        Computes the soap vectors and does FPS, if desired
    """
    soap_default = dict(soap_type="PowerSpectrum",
               interaction_cutoff=3.5,
               max_radial=6,
               max_angular=6,
               gaussian_sigma_type="Constant",
               gaussian_sigma_constant=0.4,
               cutoff_smooth_width=0.5)

    for h in soap_default:
        if h not in soap_hypers:
            soap_hypers[h] = soap_default[h]
    # Compute SOAPs (from librascal tutorial)
    soap = SOAP(**soap_hypers)

    for frame in frames:
        frame.wrap()

    soap_rep = soap.transform(frames)
    X_raw = soap_rep.get_features(soap)

    num_features = X_raw.shape[1]

    if(n_FPS is not None):
        print(f"Each SOAP vector contains {num_features} components.\
               \nWe will use furthest point sampling to generate a subsample containing {n_FPS} components of our SOAP vectors.")

        # FPS the components
        col_idxs, col_dist = FPS(X_raw.T, n_FPS)
        X = X_raw[:, col_idxs]
    else:
        X = X_raw

    return X#, X_split
コード例 #14
0
class SymmetrizedAtomicDensityCorrelation:
    '''
    SOAP coefficients vector, as obtained from rascal
    '''
    def __init__(self, spherical_hypers, target):
        self.representation = SphericalInvariants(**spherical_hypers)
        assert target in ['Atom', 'Structure']
        self.target = target

    def get_metadata(self):
        return {
            'class': class_name(self),
            'target': self.target,
            'spherical_invariant_hypers': self.representation.hypers,
        }

    def compute(self, frames, center_atom_id_mask):
        if self.target == 'Atom':
            return self.representation.transform(frames).get_features(self.representation)
        elif self.target == 'Structure':
            # computes sum feature
            atom_features = self.representation.transform(frames).get_features(self.representation)
            atom_to_struc_idx = np.hstack( (0, np.cumsum([len(center_mask) for center_mask in center_atom_id_mask])) )
            return np.vstack( [np.mean(atom_features[atom_to_struc_idx[i]:atom_to_struc_idx[i+1]], axis=0) for i in range(len(frames))] )
コード例 #15
0
def get_randomly_sparsified_soap(data, rep):
    rep_hypers = deepcopy(data['representation'])
    feat_frac = rep_hypers['coefficient_subselection']
    global_species = rep_hypers['global_species']
    feature_mapping = get_feature_index_mapping(rep, global_species)
    ids = np.arange(len(feature_mapping))
    np.random.seed(data['seed'])
    np.random.shuffle(ids)
    sel_feat = {'a': [], 'b': [], 'n1': [], 'n2': [], 'l': []}
    n_feat = int(feat_frac * len(feature_mapping))
    for idx in ids[:n_feat]:
        feat = feature_mapping[idx]
        for k, v in feat.items():
            sel_feat[k].append(int(v))
    rep_hypers['coefficient_subselection'] = sel_feat
    soap = SphericalInvariants(**rep_hypers)
    return soap, n_feat
コード例 #16
0
def compute_knm(job):
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]

    X_pseudo = load_obj(job.fn(group['sparse_point_fn']))

    hypers = X_pseudo.representation._get_init_params()
    hypers['compute_gradients'] = job.sp.train_with_grad

    soap = SphericalInvariants(**hypers)
    kernel = Kernel(soap, **sp['kernel'])

    Nstructures = len(frames)
    Ngrad_stride = [0]
    Ngrads = 0
    for frame in frames:
        n_at = len(frame)
        Ngrad_stride.append(n_at * 3)
        Ngrads += n_at * 3
    Ngrad_stride = np.cumsum(Ngrad_stride) + Nstructures
    dump_obj(job.fn(group['kernel_fn']), kernel)

    if job.sp.train_with_grad:
        KNM = np.zeros((Nstructures + Ngrads, X_pseudo.size()))
    else:
        KNM = np.zeros((Nstructures, X_pseudo.size()))

    for i_frame, frame in enumerate(frames):
        en_row, grad_rows = compute(i_frame,
                                    frame,
                                    soap,
                                    kernel,
                                    X_pseudo,
                                    grad=job.sp.train_with_grad)
        KNM[i_frame] = en_row
        if job.sp.train_with_grad:
            KNM[Ngrad_stride[i_frame]:Ngrad_stride[i_frame + 1]] = grad_rows

    np.save(job.fn(group['knm_fn']), KNM)
コード例 #17
0
def compute_sparse_point(job):
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]
    soap = SphericalInvariants(**sp['representation'])
    managers = soap.transform(frames)
    compressor = RandomFilter(soap, **sp['sparse_point_subselection'])
    compressor.select(managers)

    feature_subselection = fromjson(job.fn(group['feature_fn']))
    sp['representation']['coefficient_subselection'] = feature_subselection[
        'coefficient_subselection']
    soap = SphericalInvariants(**sp['representation'])
    managers = soap.transform(frames)
    compressor._representation = soap
    X_pseudo = compressor.filter(managers)
    dump_obj(job.fn(group['sparse_point_fn']), X_pseudo)
コード例 #18
0
def benchmark_spherical_representations(frames, optimization_args,
                                        radial_basis):
    hypers = {
        "interaction_cutoff": INTERACTION_CUTOFF,
        "max_radial": 8,
        "max_angular": 6,
        "gaussian_sigma_constant": 0.5,
        "gaussian_sigma_type": "Constant",
        "cutoff_smooth_width": 0.5,
        "radial_basis": radial_basis,
        "optimization": optimization_args,
    }

    print("Timing SphericalExpansion")
    transform_representation(
        SphericalExpansion(**hypers),
        frames,
        nb_iterations=NB_ITERATIONS_PER_REPRESENTATION,
    )

    hypers = {
        "soap_type": "PowerSpectrum",
        "interaction_cutoff": INTERACTION_CUTOFF,
        "max_radial": 8,
        "max_angular": 6,
        "gaussian_sigma_constant": 0.5,
        "gaussian_sigma_type": "Constant",
        "cutoff_smooth_width": 0.5,
        "normalize": False,
        "radial_basis": radial_basis,
        "optimization": optimization_args,
    }
    print("Timing SphericalInvariants")
    transform_representation(
        SphericalInvariants(**hypers),
        frames,
        nb_iterations=NB_ITERATIONS_PER_REPRESENTATION,
    )
コード例 #19
0
ファイル: python_utils_test.py プロジェクト: Luthaf/librascal
    def test_hypers_construction(self):
        """Checks that manually-constructed and automatic
        framework are consistent."""

        hypers = deepcopy(self.hypers)

        hypers["max_radial"] = self.expanded_max_radial
        spex = SphericalExpansion(**hypers)
        feats = spex.transform(self.frames).get_features_by_species(spex)

        cov = get_radial_basis_covariance(spex, feats)

        p_val, p_vec = get_radial_basis_pca(cov)

        p_mat = get_radial_basis_projections(p_vec, self.hypers["max_radial"])

        # now makes this SOAP
        hypers["max_radial"] = self.hypers["max_radial"]
        hypers["soap_type"] = "PowerSpectrum"
        hypers["optimization"] = {
            "RadialDimReduction": {
                "projection_matrices": p_mat
            },
            "Spline": {
                "accuracy": 1e-8
            },
        }

        # compute SOAP
        soap_opt = SphericalInvariants(**hypers)
        soap_feats = soap_opt.transform(self.frames).get_features(soap_opt)

        # now we do the same with the compact utils
        hypers = deepcopy(self.hypers)
        hypers["soap_type"] = "PowerSpectrum"
        hypers = get_optimal_radial_basis_hypers(
            hypers, self.frames, expanded_max_radial=self.expanded_max_radial)
        soap_opt_2 = SphericalInvariants(**hypers)
        soap_feats_2 = soap_opt_2.transform(
            self.frames).get_features(soap_opt_2)

        self.assertTrue(np.allclose(soap_feats, soap_feats_2))
コード例 #20
0
    mask_center_atoms_by_species(molecule, species_select=[
        'C',
    ])
    # Also works by atomic number
    #mask_center_atoms_by_species(molecule, species_select=[6,])

hypers = {
    'interaction_cutoff': 5.0,
    'cutoff_smooth_width': 0.5,
    'max_radial': 8,
    'max_angular': 6,
    'gaussian_sigma_type': "Constant",
    'gaussian_sigma_constant': 0.3
}

representation = SphericalInvariants(**hypers)
atoms_transformed = representation.transform(molecules)
print("Number of feature vectors computed: {:d}".format(
    atoms_transformed.get_features(representation).shape[0]))

print("Now masking out the first 5 atoms of each molecule.")
n_remaining_centers = sum(
    np.sum((mol.get_atomic_numbers()[5:] == 6)) for mol in molecules)
print("Number of centres remaining: {:d}".format(n_remaining_centers))

for molecule in molecules:
    mask_center_atoms_by_id(molecule, id_blacklist=np.arange(5))
atoms_transformed = representation.transform(molecules)
print("Number of feature vectors computed: {:d}".format(
    atoms_transformed.get_features(representation).shape[0]))
コード例 #21
0
def dump_reference_json():
    sys.path.insert(0, join(root, "build/"))
    sys.path.insert(0, join(root, "tests/"))
    from rascal.representations import SphericalInvariants
    from ase.io import read

    np.random.seed(10)
    fns = [
        "diamond_2atom_distorted.json",
        "CaCrP2O7_mvc-11955_symmetrized.json",
        "methane.json",
    ]
    soap_types = ["PowerSpectrum"]
    Nselects = ["all", "all_random", "8_random"]

    sparsification_inputs = []
    for fn, soap_type, Nselect in product(fns, soap_types, Nselects):
        frames = read(join(inputs_path, fn), ":")

        hypers = dict(
            soap_type=soap_type,
            interaction_cutoff=3.5,
            max_radial=2,
            max_angular=2,
            gaussian_sigma_constant=0.4,
            gaussian_sigma_type="Constant",
            cutoff_smooth_width=0.5,
            normalize=False,
            compute_gradients=True,
            expansion_by_species_method="structure wise",
        )

        soap = SphericalInvariants(**hypers)
        managers = soap.transform(frames)
        hyp = deepcopy(hypers)

        # select some features from the possible set
        mapping = soap.get_feature_index_mapping(managers)
        selected_features = {key: [] for key in mapping[0].keys()}
        ids = np.array([key for key in mapping.keys()])
        if Nselect == "all":
            pass
        elif Nselect == "all_random":
            np.random.shuffle(ids)
        elif Nselect == "8_random":
            np.random.shuffle(ids)
            ids = ids[:8]
        else:
            raise NotImplementedError()
        for idx in ids:
            coef_idx = mapping[idx]
            for key in selected_features.keys():
                selected_features[key].append(int(coef_idx[key]))
        # selected_features_global_ids is important for the tests
        selected_features["selected_features_global_ids"] = ids.tolist()
        mapp = dict(coefficient_subselection=selected_features)

        hyp.update(mapp)

        soap_s = SphericalInvariants(**hyp)
        managers_s = soap_s.transform(frames)

        sparsification_inputs.append(
            dict(
                hypers=dict(
                    rep=soap.hypers,
                    rep_sparse=soap_s.hypers,
                    adaptors=json.loads(managers_s.managers.get_parameters()),
                ),
                filename=join(read_inputs_path, fn),
                Nselect=Nselect,
            ))

    fn_out = join(root, dump_path, "sparsification_inputs.json")
    print(fn_out)
    with open(fn_out, "w") as f:
        sparsification_inputs_pretty = prettyjson(sparsification_inputs,
                                                  indent=2,
                                                  maxlinelength=80)
        f.write(sparsification_inputs_pretty)
コード例 #22
0
def dump_reference_json():
    import ubjson
    import os
    from copy import copy
    sys.path.insert(0, os.path.join(root, 'build/'))
    sys.path.insert(0, os.path.join(root, 'tests/'))

    cutoffs = [3.5]
    gaussian_sigmas = [0.5]
    max_radials = [6]
    max_angulars = [6]
    soap_types = ["RadialSpectrum", "PowerSpectrum"]

    fn = os.path.join(inputs_path, "small_molecules-20.json")
    fn_to_write = os.path.join(
        'reference_data', "inputs", "small_molecules-20.json")
    start = 0
    length = 5
    representations = ['spherical_invariants']
    kernel_names = ['Cosine']
    target_types = ['Structure', 'Atom']
    dependant_args = dict(Cosine=[dict(zeta=1), dict(zeta=2), dict(zeta=4)])

    data = dict(filename=fn_to_write,
                start=start,
                length=length,
                cutoffs=cutoffs,
                gaussian_sigmas=gaussian_sigmas,
                max_radials=max_radials,
                soap_types=soap_types,
                kernel_names=kernel_names,
                target_types=target_types,
                dependant_args=dependant_args,
                rep_info=dict(spherical_invariants=[]))

    frames = read(fn, '{}:{}'.format(start, start + length))
    for representation_name in representations:
        for cutoff in cutoffs:
            print(fn, cutoff)
            data['rep_info'][representation_name].append([])
            for kernel_name in kernel_names:
                for target_type in target_types:
                    for kwargs in dependant_args[kernel_name]:
                        for soap_type in soap_types:
                            for gaussian_sigma in gaussian_sigmas:
                                for max_radial in max_radials:
                                    for max_angular in max_angulars:
                                        if 'RadialSpectrum' == soap_type:
                                            max_angular = 0

                                        hypers = {"interaction_cutoff": cutoff,
                                                  "cutoff_smooth_width": 0.5,
                                                  "max_radial": max_radial,
                                                  "max_angular": max_angular,
                                                  "gaussian_sigma_type": "Constant",
                                                  "gaussian_sigma_constant": gaussian_sigma,
                                                  "soap_type": soap_type,
                                                  "cutoff_function_type": "ShiftedCosine",
                                                  "normalize": True,
                                                  "radial_basis": "GTO"}
                                        soap = SphericalInvariants(**hypers)
                                        soap_vectors = soap.transform(frames)
                                        hypers_kernel = dict(name=kernel_name,
                                                             target_type=target_type)
                                        hypers_kernel.update(**kwargs)
                                        kernel = Kernel(soap, **hypers_kernel)
                                        kk = kernel(soap_vectors)
                                        # x = get_spectrum(hypers, frames)
                                        for aa in soap.nl_options:
                                            aa['initialization_arguments'] = aa['args']

                                        data['rep_info'][representation_name][-1].append(dict(kernel_matrix=kk.tolist(),
                                                                                              hypers_rep=copy(
                                                                                                  soap.hypers),
                                                                                              hypers_manager=copy(
                                                                                                  soap.nl_options),
                                                                                              hypers_kernel=copy(hypers_kernel)))

    with open(os.path.join(root, dump_path,
                           "kernel_reference.ubjson"), 'wb') as f:
        ubjson.dump(data, f)
コード例 #23
0
def dump_reference_json():
    import ubjson
    import os
    from copy import copy
    path = '../'
    sys.path.insert(0, os.path.join(path, 'build/'))
    sys.path.insert(0, os.path.join(path, 'tests/'))

    cutoffs = [2, 3]
    gaussian_sigmas = [0.2, 0.5]
    max_radials = [4, 10]
    max_angulars = [3, 6]
    soap_types = ["RadialSpectrum", "PowerSpectrum", "BiSpectrum"]
    inversion_symmetry = False

    fns = [
        os.path.join(
            path, "tests/reference_data/CaCrP2O7_mvc-11955_symmetrized.json"),
        os.path.join(path, "tests/reference_data/small_molecule.json")
    ]
    fns_to_write = [
        "reference_data/CaCrP2O7_mvc-11955_symmetrized.json",
        "reference_data/small_molecule.json",
    ]

    data = dict(filenames=fns_to_write,
                cutoffs=cutoffs,
                gaussian_sigmas=gaussian_sigmas,
                max_radials=max_radials,
                soap_types=soap_types,
                rep_info=[])

    for fn in fns:
        frames = [json2ase(load_json(fn))]
        for cutoff in cutoffs:
            print(fn, cutoff)
            data['rep_info'].append([])
            for soap_type in soap_types:
                for gaussian_sigma in gaussian_sigmas:
                    for max_radial in max_radials:
                        for max_angular in max_angulars:
                            if 'RadialSpectrum' == soap_type:
                                max_angular = 0
                            if "BiSpectrum" == soap_type:
                                max_radial = 2
                                max_angular = 1
                                inversion_symmetry = True

                            hypers = {
                                "interaction_cutoff": cutoff,
                                "cutoff_smooth_width": 0.5,
                                "max_radial": max_radial,
                                "max_angular": max_angular,
                                "gaussian_sigma_type": "Constant",
                                "normalize": True,
                                "cutoff_function_type": "Cosine",
                                "radial_basis": "GTO",
                                "gaussian_sigma_constant": gaussian_sigma,
                                "soap_type": soap_type,
                                "inversion_symmetry": inversion_symmetry,
                            }

                            soap = SphericalInvariants(**hypers)
                            soap_vectors = soap.transform(frames)
                            x = soap_vectors.get_feature_matrix()
                            # x = get_feature_vector(hypers, frames)
                            data['rep_info'][-1].append(
                                dict(feature_matrix=x.tolist(),
                                     hypers=copy(soap.hypers)))

    with open(
            path +
            "tests/reference_data/spherical_invariants_reference.ubjson",
            'wb') as f:
        ubjson.dump(data, f)
コード例 #24
0
ファイル: test_filter.py プロジェクト: Luthaf/librascal
class IndexConversionTest(unittest.TestCase):
    """Test index conversion utilities for filtering"""
    def setUp(self):
        self.example_frames = ase.io.read(
            "reference_data/inputs/small_molecules-20.json", ":5")
        global_species = set()
        for frame in self.example_frames:
            global_species.update(
                [int(sp) for sp in frame.get_atomic_numbers()])
        self.global_species = global_species
        self.repr = SOAP(
            max_radial=3,
            max_angular=0,
            soap_type="RadialSpectrum",
            interaction_cutoff=4.0,
            cutoff_smooth_width=1.0,
            gaussian_sigma_type="Constant",
            gaussian_sigma_constant=0.3,
            expansion_by_species_method="user defined",
            global_species=sorted(list(global_species)),
        )
        self.managers = self.repr.transform(self.example_frames)
        self.example_features = self.managers.get_features(self.repr)

    def test_split_by_sp(self):
        """Test the feature matrix species splitter"""
        # Let's try a basic example matrix first
        X = np.concatenate([
            frame.get_atomic_numbers() for frame in self.example_frames
        ])[:, np.newaxis]
        sps = list(self.global_species)
        X_by_sp = filter._split_feature_matrix_by_species(
            self.managers, X, sps)
        for sp in sps:
            self.assertTrue(np.all(X_by_sp[sp] == sp))
        # Now with the actual feature matrix
        X_by_sp = filter._split_feature_matrix_by_species(
            self.managers, self.example_features, sps)
        X_by_sp_manual = {}
        atoms_species = X.flatten()
        for sp in sps:
            X_by_sp_manual = self.example_features[atoms_species == sp]
            self.assertTrue(np.all(X_by_sp_manual == X_by_sp[sp]))

    def test_indices_global(self):
        """Test the global-to-perstructure index transformation"""
        example_idces_global = [0, 2, 16, 17, 82]
        example_idces_perstructure = filter._indices_manager_to_perstructure(
            self.managers, example_idces_global)
        self.assertEqual(example_idces_perstructure,
                         [[0, 2, 16], [0], [], [], [19]])
        # It should also work with an ASE list of atoms
        example_idces_perstructure = filter._indices_manager_to_perstructure(
            self.example_frames, example_idces_global)
        self.assertEqual(example_idces_perstructure,
                         [[0, 2, 16], [0], [], [], [19]])

    def test_indices_global_out_of_range(self):
        """Test the index transformer with out-of-range indices"""
        with self.assertRaisesRegex(
                ValueError, rf"Selected index\(es\): \[83\] out of range"):
            filter._indices_manager_to_perstructure(
                self.managers,
                [
                    83,
                ],
            )
        bad_indices = list(range(83, 90))
        bad_indices_str = "83, 84, ..., 88, 89"
        with self.assertRaisesRegex(
                ValueError,
                rf"Selected index\(es\): \[{bad_indices_str}\] out of range"):
            filter._indices_manager_to_perstructure(self.managers, bad_indices)

    def test_indices_perspecies(self):
        """Test the per-species, global-to-perstructure index transformation"""
        example_idces_perspecies = {
            1: [0, 1, 20],
            8: [1, 2, 5],
            6: [],
            7: [0, 4]
        }
        # The order of the species should not matter;
        # the output is always sorted by species
        sps = [7, 6, 8, 1]
        perspecies_idces = filter._indices_perspecies_manager_to_perstructure(
            self.managers, example_idces_perspecies, sps)
        self.assertEqual(perspecies_idces,
                         [[8, 9, 4], [], [9, 7, 0, 8], [], [0]])
        # Try it with an iterator (instead of a list) of species
        perspecies_idces = filter._indices_perspecies_manager_to_perstructure(
            self.managers, example_idces_perspecies,
            example_idces_perspecies.keys())
        self.assertEqual(perspecies_idces,
                         [[8, 9, 4], [], [9, 7, 0, 8], [], [0]])
        # And a set
        perspecies_idces = filter._indices_perspecies_manager_to_perstructure(
            self.managers, example_idces_perspecies, set(sps))
        self.assertEqual(perspecies_idces,
                         [[8, 9, 4], [], [9, 7, 0, 8], [], [0]])

    def test_indices_perspecies_out_of_range(self):
        """Test the per-species index transformer with out-of-range indices"""
        bad_idces_perspecies = {1: [], 8: [6], 7: [], 6: []}
        sps = [1, 6, 7, 8]
        with self.assertRaisesRegex(
                ValueError,
                r"Selected index\(es\): \[6\] for species 8 out of range"):
            filter._indices_perspecies_manager_to_perstructure(
                self.managers, bad_idces_perspecies, sps)
        nonexistent_idces_perspecies = {1: [], 8: [], 7: [], 6: [], 12: [0]}
        sps = [1, 6, 7, 8, 12]
        with self.assertRaisesRegex(
                ValueError,
                r"Selected index\(es\): \[0\] for species 12 out of range "
                r"\(species does not appear to be present\)",
        ):
            filter._indices_perspecies_manager_to_perstructure(
                self.managers, nonexistent_idces_perspecies, sps)

    def test_indices_bad_species(self):
        """Test the global-to-perstructure selection with bad species lists"""
        # Missing species
        sps_bad = [1, 6, 8]
        sps_bad_re_str = r"\[1, 6, 8\]"
        with self.assertRaisesRegex(
                ValueError,
                f"^Atom of type 7 found but was not listed in sps: {sps_bad_re_str}$",
        ):
            filter._indices_perspecies_manager_to_perstructure(
                self.managers, {sp: []
                                for sp in sps_bad}, sps_bad)
        # Duplicated species
        sps_bad = [1, 6, 6, 7, 8]
        with self.assertRaisesRegex(
                ValueError,
                rf"^List of species contains duplicated entries: \[1, 6, 6, 7, 8\]",
        ):
            filter._indices_perspecies_manager_to_perstructure(
                self.managers, {sp: [0]
                                for sp in sps_bad}, sps_bad)
コード例 #25
0
 def test_pickle(self):
     rep = SphericalInvariants(**self.hypers)
     serialized = pickle.dumps(rep)
     rep_ = pickle.loads(serialized)
     self.assertTrue(to_dict(rep) == to_dict(rep_))
コード例 #26
0
    def test_representation_gradient(self):
        """
        Test the get_features and get_features_gradient functions by computing
        the linear sparse kernel matrix and check that the exported features
        lead to the same kernel matrix as the reference method.
        """
        hypers = deepcopy(self.hypers)
        hypers["compute_gradients"] = True
        rep = SphericalInvariants(**hypers)

        features = rep.transform(self.frames)

        n_sparses = {1: 1, 6: 1, 8: 1, 14: 1, 15: 1, 20: 1, 24: 1}

        compressor = FPSFilter(rep, n_sparses, act_on="sample per species")
        X_pseudo = compressor.select_and_filter(features)

        xs = X_pseudo.get_features()
        n_sparse, n_feat = xs.shape
        masks = {sp: np.zeros(n_sparse, dtype=bool) for sp in n_sparses}
        ii = 0
        for sp, mask in masks.items():
            mask[ii:ii + n_sparses[sp]] = 1
            ii = ii + n_sparses[sp]

        zeta = 1
        kernel = Kernel(rep,
                        name="GAP",
                        zeta=zeta,
                        target_type="Structure",
                        kernel_type="Sparse")

        ij = features.get_gradients_info()
        n_atoms = len(np.unique(ij[:, 1]))
        n_neigh = ij.shape[0]

        KNM_ref = kernel(features, X_pseudo, (False, False))
        X = features.get_features(rep).reshape((n_atoms, n_feat))
        KNM = np.zeros((len(self.frames), n_sparse))
        ii = 0
        for iff, frame in enumerate(features):
            for at in frame:
                sp = at.atom_type
                KNM[iff, masks[sp]] += np.dot(X[ii], xs[masks[sp]].T)
                ii += 1
        self.assertTrue(np.allclose(KNM_ref, KNM))

        KNM_ref = kernel(features, X_pseudo, (True, False))

        X_der = features.get_features_gradient(rep).reshape(
            (n_neigh, 3, n_feat))

        KNM = np.zeros((n_atoms, 3, n_sparse))
        for ii, (i_frame, i, j, i_sp, j_sp) in enumerate(ij):
            sp = i_sp
            KNM[j, 0, masks[sp]] += np.dot(X_der[ii, 0], xs[masks[sp]].T)
            KNM[j, 1, masks[sp]] += np.dot(X_der[ii, 1], xs[masks[sp]].T)
            KNM[j, 2, masks[sp]] += np.dot(X_der[ii, 2], xs[masks[sp]].T)

        KNM = KNM.reshape((-1, n_sparse))

        self.assertTrue(np.allclose(KNM_ref, KNM))
コード例 #27
0
def compute_benchmark(job):
    from rascal.models.krr import compute_sparse_kernel_gradients
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]

    model = load_obj(job.fn(group['model_fn']))
    soap = model.get_representation_calculator()
    grads_timing = job.sp.grads_timing

    hypers = soap._get_init_params()
    hypers['compute_gradients'] = grads_timing
    soap = SphericalInvariants(**hypers)

    rc = sp['representation']['interaction_cutoff']
    nl_options = [
        dict(name='centers', args=[]),
        dict(name='neighbourlist', args=dict(cutoff=rc)),
        # dict(name='halflist', args=dict()),
        dict(name="centercontribution", args=dict()),
        dict(name='strict', args=dict(cutoff=rc))
    ]

    kernel = Kernel(soap, **sp['kernel'])

    N_ITERATIONS = sp['N_ITERATIONS']
    if grads_timing:
        tags = ['NL', 'rep with grad', 'pred energy', 'pred forces']
    else:
        tags = ['NL', 'rep', 'pred energy']

    timers = {k: Timer(tag=k, logger=None) for k in tags}
    if job.sp.name != 'qm9':
        frames = [
            make_supercell(frames[0],
                           job.sp.n_replication * np.eye(3),
                           wrap=True,
                           tol=1e-11)
        ]
    else:
        frames = frames[:100]

    if grads_timing:
        for ii in range(N_ITERATIONS):
            with timers['NL']:
                managers = AtomsList(frames, nl_options)
            sleep(0.1)
            with timers['rep with grad']:
                managers = soap.transform(managers)
            sleep(0.1)
            Y0 = model._get_property_baseline(managers)
            with timers['pred energy']:
                KNM = kernel(managers, model.X_train, (False, False))
                Y0 + np.dot(KNM, model.weights).reshape((-1))
            sleep(0.1)
            with timers['pred forces']:
                rep = soap._representation
                forces = -compute_sparse_kernel_gradients(
                    rep, model.kernel._kernel, managers.managers,
                    model.X_train._sparse_points, model.weights.reshape(
                        (1, -1)))
            sleep(0.1)
            managers, KNM = [], []
            del managers, KNM
            sleep(0.3)
    else:
        for ii in range(N_ITERATIONS):
            with timers['NL']:
                managers = AtomsList(frames, nl_options)
            sleep(0.1)
            with timers['rep']:
                managers = soap.transform(managers)
            sleep(0.1)
            Y0 = model._get_property_baseline(managers)
            with timers['pred energy']:
                KNM = kernel(managers, model.X_train, (False, False))
                Y0 + np.dot(KNM, model.weights).reshape((-1))
            sleep(0.1)

            managers, KNM = [], []
            del managers, KNM
            sleep(0.3)

    n_atoms = 0
    for frame in frames:
        n_atoms += len(frame)

    timings = []
    for tag in tags:
        data = timers[tag].dumps()
        data.update({'name': job.sp.name, 'n_atoms': n_atoms})
        timings.append(data)

    tojson(job.fn(group['benchmark_fn']), timings)
コード例 #28
0
def compute_squared_radial_spectrum_wasserstein_distance(
        feature_paramaters, frames, nb_grid_points=200):
    if feature_paramaters["feature_parameters"][
            "soap_type"] != "RadialSpectrum":
        raise ValueError(
            'Wasserstein features can be only computed for soap_type="RadialSpectrum".'
        )
    if feature_paramaters["feature_parameters"]["radial_basis"] != "DVR":
        raise ValueError(
            'Wasserstein features can be only computed for radial_basis="DVR".'
        )

    nb_basis_functions = feature_paramaters["feature_parameters"]["max_radial"]
    feature_paramaters["feature_parameters"]["max_radial"] = nb_grid_points
    normalize_wasserstein_features = feature_paramaters["feature_parameters"][
        "normalize"]
    feature_paramaters["feature_parameters"]["normalize"] = False
    cutoff = feature_paramaters["feature_parameters"]["interaction_cutoff"]

    # compute soap representation for interpolation
    representation = SphericalInvariants(
        **feature_paramaters["feature_parameters"])
    densities = representation.transform(frames).get_features(representation)
    nb_envs = densities.shape[0]
    nb_species = densities.shape[1] // nb_grid_points
    densities = densities.reshape(nb_envs * nb_species, nb_grid_points)

    # DVR uses gaussian quadrature points as basis function, we reproduce the original grid points for the interpolation
    density_grid, density_weights = np.polynomial.legendre.leggauss(
        nb_grid_points)
    density_grid = density_grid * cutoff / 2 + cutoff / 2
    density_grid = np.hstack((0, density_grid))
    densities /= np.sqrt(density_weights)
    cdf = np.cumsum(densities, axis=1)

    # gaussian quadrature points as grid
    if feature_paramaters["hilbert_space_parameters"]["distance_parameters"][
            "grid_type"] == "gaussian_quadrature":
        interp_grid, interp_weights = np.polynomial.legendre.leggauss(
            nb_basis_functions)
        interp_grid = interp_grid / 2 + 0.5
    elif feature_paramaters["hilbert_space_parameters"]["distance_parameters"][
            "grid_type"] == "equispaced":
        interp_grid = np.linspace(0, 1, nb_basis_functions)
    else:
        raise ValueError(
            "The wasserstein grid_type=" +
            feature_parameters["distance_parameters"]["grid_type"] +
            " is not known.")

    # normalize nonzero environments
    nonzero_mask = cdf[:, -1] != 0
    # insert the zero probabilty point at the beginning to help interpolating at the beginning
    cdf = np.concatenate((np.zeros((cdf.shape[0], 1)), cdf), axis=1)

    dist = np.zeros((nb_envs, nb_envs))
    if feature_paramaters["hilbert_space_parameters"]["distance_parameters"][
            "delta_normalization"]:
        cdf = cdf.reshape(nb_envs, nb_species, nb_grid_points + 1)
        # potential bug when species are present
        for i in range(nb_envs):  # subset of nb_envs*nb_species
            for j in range(nb_envs):  # subset of nb_envs*nb_species
                for sp in range(nb_species):
                    max_norm = max(cdf[i, sp, -1], cdf[j, sp, -1])
                    cdf_i = np.copy(cdf[i, sp, :])
                    cdf_j = np.copy(cdf[j, sp, :])
                    cdf_i[-1] = max_norm
                    cdf_j[-1] = max_norm
                    cdf_i /= max_norm
                    cdf_j /= max_norm
                    interpolator_i = interp1d(cdf_i,
                                              density_grid,
                                              assume_sorted=True)
                    interpolator_j = interp1d(cdf_j,
                                              density_grid,
                                              assume_sorted=True)
                    wasserstein_features_i = interpolator_i(interp_grid)
                    wasserstein_features_j = interpolator_j(interp_grid)
                    dist[i, j] += np.sum(
                        (wasserstein_features_i - wasserstein_features_j)**2)
        return dist
    else:
        cdf[nonzero_mask] /= cdf[:, -1][nonzero_mask][:, np.newaxis]
        wasserstein_features = np.zeros(
            (nb_envs * nb_species, nb_basis_functions))
        for i in np.where(nonzero_mask)[0]:  # subset of nb_envs*nb_species
            interpolator = interp1d(cdf[i, :],
                                    density_grid,
                                    assume_sorted=True)
            wasserstein_features[i, :] = interpolator(interp_grid)
        if feature_paramaters["hilbert_space_parameters"][
                "distance_parameters"]["grid_type"] == "gaussian_quadrature":
            wasserstein_features *= np.sqrt(interp_weights)

        wasserstein_features = wasserstein_features.reshape(
            nb_envs, nb_species * nb_basis_functions)

        if normalize_wasserstein_features:
            wasserstein_features /= np.linalg.norm(wasserstein_features,
                                                   axis=1)[:, np.newaxis]
        return squareform(pdist(wasserstein_features))
コード例 #29
0
def compute_radial_spectrum_wasserstein_features(feature_paramaters, frames):
    """Compute"""
    if feature_paramaters["soap_parameters"]["soap_type"] != "RadialSpectrum":
        raise ValueError(
            'Wasserstein features can be only computed for soap_type="RadialSpectrum".'
        )
    if feature_paramaters["soap_parameters"]["radial_basis"] != "DVR":
        raise ValueError(
            'Wasserstein features can be only computed for radial_basis="DVR".'
        )

    nb_basis_functions = feature_paramaters["nb_basis_functions"]
    nb_grid_points = feature_paramaters["soap_parameters"]["max_radial"]
    normalize_wasserstein_features = feature_paramaters["soap_parameters"][
        "normalize"]
    feature_paramaters["soap_parameters"]["normalize"] = False
    cutoff = feature_paramaters["soap_parameters"]["interaction_cutoff"]

    # compute soap representation for interpolation
    representation = SphericalInvariants(
        **feature_paramaters["soap_parameters"])
    densities = representation.transform(frames).get_features(representation)
    nb_envs = densities.shape[0]
    nb_species = densities.shape[1] // nb_grid_points
    densities = densities.reshape(nb_envs * nb_species, nb_grid_points)

    # DVR uses gaussian quadrature points as basis function, we reproduce the original grid points for the interpolation
    density_grid, density_weights = np.polynomial.legendre.leggauss(
        nb_grid_points)
    density_grid = density_grid * cutoff / 2 + cutoff / 2
    densities /= np.sqrt(density_weights)

    cdf = scipy.integrate.cumtrapz(densities, density_grid)
    # insert the zero probabilty point at the beginning to help interpolating at the beginning
    cdf = np.hstack((np.zeros((cdf.shape[0], 1)), cdf))

    if feature_paramaters["delta_normalization"]:
        cdf = cdf.reshape(nb_envs, nb_species, nb_grid_points)
        delta_sigma = feature_paramaters["delta_sigma"]
        delta_offset_percentage = feature_paramaters["delta_offset_percentage"]
        if delta_sigma is None:
            for i in range(nb_species):
                max_norm = np.max(cdf[:, i, -1])
                max_norm += delta_offset_percentage * max_norm
                cdf[:, i, -1] += max_norm - cdf[:, i, -1]
        else:
            for i in range(nb_species):
                cdf[:, i, :] = bump_function(density_grid, cdf[:, i, :],
                                             cutoff, delta_sigma)
        cdf = cdf.reshape(nb_envs * nb_species, nb_grid_points)

    # normalize nonzero environments
    nonzero_mask = cdf[:, -1] != 0
    cdf[nonzero_mask] /= cdf[:, -1][nonzero_mask][:, np.newaxis]

    # gaussian quadrature points as grid
    if feature_paramaters["grid_type"] == "gaussian_quadrature":
        interp_grid, interp_weights = np.polynomial.legendre.leggauss(
            nb_basis_functions)
        interp_grid = interp_grid / 2 + 0.5
    elif feature_paramaters["grid_type"] == "equispaced":
        interp_grid = np.linspace(0, 1, nb_basis_functions)
    else:
        raise ValueError("The wasserstein grid_type=" +
                         feature_paramaters["grid_type"] + " is not known.")

    wasserstein_features = np.zeros((nb_envs * nb_species, nb_basis_functions))
    # add jitter for uniqueness
    jitter = np.finfo(0.1).tiny * np.arange(cdf.shape[1])
    cdf += jitter[np.newaxis, :]
    for i in np.where(nonzero_mask)[0]:  # subset of nb_envs*nb_species
        interpolator = interp1d(cdf[i],
                                density_grid,
                                assume_sorted=True,
                                kind='linear')
        wasserstein_features[i, :] = interpolator(interp_grid)

    # delta normalization 2 sets delta areas to 0 so they cannot be used as features
    if feature_paramaters["delta_normalization"] == 2:
        wasserstein_features[cutoff - 1e-3 <= wasserstein_features] = 0

    if feature_paramaters["grid_type"] == "gaussian_quadrature":
        wasserstein_features *= np.sqrt(interp_weights)

    wasserstein_features = wasserstein_features.reshape(
        nb_envs, nb_species * nb_basis_functions)

    if normalize_wasserstein_features:
        wasserstein_features /= np.linalg.norm(wasserstein_features,
                                               axis=1)[:, np.newaxis]

    return wasserstein_features
コード例 #30
0
 def __init__(self, spherical_hypers, target):
     self.representation = SphericalInvariants(**spherical_hypers)
     assert target in ['Atom', 'Structure']
     self.target = target