Example #1
0
def kernel_attribute(dset1, dset2, kernel_options, kweights, xi):
    delta_Y = []
    if kernel_options["topkernel_type"] == "average":
        for i in range(len(dset2)):
            dset2[i].sum()
            dset2[i].normalize()
    kernel = get_cxx_kernel(kernel_options)
    soap.silence()
    for i in range(len(dset1)):
        log << log.back << "Attribute" << i << log.flush
        Ki = kernel.attributeLeft(dset1[i], dset2, "float64")
        dset_i = soap.DMapMatrixSet()
        dset_i.append(dset1[i])
        # TO CHECK
        # >>> K = kernel.evaluate(dset_i, dset2, False, "float64")
        # >>> print Ki
        # >>> print np.sum(Ki, axis=0), "==", K
        # >>> raw_input('...')
        Kii = kernel.evaluate(dset_i, dset_i, True, "float64")
        Ki = Ki / np.sum(Kii)**0.5
        # Account for top-level exponent xi
        Ki = Ki * np.sum(Ki, axis=0)**(xi - 1)
        delta_Yi = Ki.dot(kweights)
        delta_Y.append(list(delta_Yi))
    log << log.endl
    return delta_Y
Example #2
0
 def __init__(self, dset, slices):
     self.dset = dset
     self.dmap_ensembles = []
     self.slices = slices
     for dset_slice in slices:
         ens = soap.DMapMatrixSet()
         for s in dset_slice: ens.append(dset[s])
         self.dmap_ensembles.append(ens)
     return
Example #3
0
def evaluate_soap(configs, options):
    dset = soap.DMapMatrixSet()
    for cidx, config in enumerate(configs):
        spectrum = soap.soapy.PowerSpectrum(config=config,
                                            options=options,
                                            label="config-%d" % cidx)
        dmap = spectrum.exportDMapMatrix()
        log << "Config %d: %d centers" % (cidx, dmap.rows) << log.endl
        dset.append(dmap)
    return dset
Example #4
0
def setup(configs, soap_options, verbose=True):
    # Returns average normalized SOAP vectors (DMap's) for all configs, along with a dummy weight vector
    dset = soap.DMapMatrixSet()
    for idx, config in enumerate(configs):
        if verbose: log << log.back << "SOAP for structure" << idx << log.flush
        if idx == 0: soap_options["spectrum.gradients"] = True
        else: soap_options["spectrum.gradients"] = False
        dset.append(soap.soapy.PowerSpectrum(config, soap_options).exportDMapMatrix())
    if verbose: log << log.endl
    for i in range(len(dset)):
        dset[i].sum()
        dset[i].normalize()
    return dset[0][0], [ dset[i][0] for i in range(1, len(dset)) ], np.ones(shape=(len(dset)-1,))/(len(dset)-1)
Example #5
0
def evaluate_soap(configs, options):
    dset = soap.DMapMatrixSet()
    for cidx, config in enumerate(configs):
        # Exclude hydrogen centers:
        # NOTE that setting "exclude_centers"=["H"] is not sufficient
        # as discrete atomic types are lost in the embedding
        h_idcs = np.where(np.array(config.symbols) == "H")[0]
        options["exclude_center_ids"] = [int(i + 1) for i in h_idcs]
        spectrum = soap.soapy.PowerSpectrum(config=config,
                                            options=options,
                                            label="config-%d" % cidx)
        dmap = spectrum.exportDMapMatrix()
        log << "Config %d: %d centers" % (cidx, dmap.rows) << log.endl
        dset.append(dmap)
    return dset
Example #6
0
def soap_evaluate(configs, options, output_file):
    dset = soap.DMapMatrixSet()
    for cidx, config in enumerate(configs):
        # Handle exclusions
        excl_idcs = []
        ts = np.array(config.symbols)
        for excl in options["exclude_centers"]:
            excl_idcs_add = np.where(ts == excl)[0]
            excl_idcs = excl_idcs + list(excl_idcs_add)
        excl_idcs = sorted(excl_idcs)
        options["exclude_center_ids"] = [(i + 1) for i in excl_idcs]
        # Compute
        spectrum = soap.soapy.PowerSpectrum(config=config,
                                            options=options,
                                            label="config-%d" % cidx)
        dmap = spectrum.exportDMapMatrix()
        log << "Config %d: %d centers" % (cidx, dmap.rows) << log.endl
        dset.append(dmap)
    dset.save(output_file)
    return dset
Example #7
0
def test_dmap_convolve():
    log << log.mg << "<test_dmap_convolve>" << log.endl
    configs = soap.tools.io.read('structures.xyz')
    soap_options = configure_default()
    dset = soap.DMapMatrixSet()
    for idx, config in enumerate(configs):
        spec = soap.soapy.PowerSpectrum(config, soap_options, "S%d" % idx)
        soap.toggle_logger()
        dmap_x = spec.exportDMapMatrix()
        dmap_q = soap.DMapMatrix()
        dmap_q.appendCoherent(spec.spectrum)
        dmap_q.convolve(9, 6)
        dset.append(dmap_q)
        kxx = dmap_x.dot(dmap_x, "float64")
        kqq = dmap_q.dot(dmap_q, "float64")
        kxq = dmap_x.dot(dmap_q, "float64")
        kqx = dmap_q.dot(dmap_x, "float64")
        assert_zero(np.max(np.abs(kxx - kqq)))
        assert_zero(np.max(np.abs(kxx - kxq)))
        assert_zero(np.max(np.abs(kxx - kqx)))
        soap.toggle_logger()
        if idx == 10: break
    log << log.endl
    log << log.mg << "All passed" << log.endl
Example #8
0
def parametrize_environment_specific(settings, rerun):
    channel_name = settings["embedding_options"]["channel_name"]
    log << log.mg << "Parametrizing" << channel_name << "model" << log.endl
    soap_types = SETTINGS["soap_types"]
    log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl
    # PATHS - for example:
    # { "xyz_file": "data_esol/structures.xyz",
    #   "soap_file": "data_esol/structures.soap",
    #   "kmat_file": "data_esol/kernel.npy",
    #   "targets_file": "data_esol/targets.npy",
    #   "range_file": "data_esol/range.json",
    #   "weights_file": "data_esol/weights.npy" }
    paths = copy.deepcopy(settings["paths"])
    for p, v in paths.iteritems():
        paths[p] = os.path.join(PATH, v)
        log << "Path to %s = %s" % (p, paths[p]) << log.endl
    configs = soap.tools.io.read(paths["xyz_file"])
    # SOAP
    soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]]
    if rerun or not os.path.isfile(paths["soap_file"]):
        log << "Make target: %s" % paths["soap_file"] << log.endl
        soap_configure_default(types=soap_types)
        dset = soap_evaluate(configs, soap_options, paths["soap_file"])
    else:
        log << "Load target: %s" % paths["soap_file"] << log.endl
        dset = soap.DMapMatrixSet(paths["soap_file"])
    # KERNEL
    kernel_options = settings["kernel_options"]
    if rerun or not os.path.isfile(paths["kmat_file"]):
        log << "Make target: %s" % paths["kmat_file"] << log.endl
        K = kernel_evaluate(dset, kernel_options, paths["kmat_file"])
    else:
        log << "Load target: %s" % paths["kmat_file"] << log.endl
        K = np.load(paths["kmat_file"])
    # TARGETS
    target_key = settings["regression_options"]["target_key"]
    if rerun or not os.path.isfile(paths["targets_file"]):
        log << "Make target: %s" % paths["targets_file"] << log.endl
        targets = np.array([float(c.info[target_key]) for c in configs])
        np.save(paths["targets_file"], targets)
    else:
        log << "Load target: %s" % paths["targets_file"] << log.endl
        targets = np.load(paths["targets_file"])
    # MODEL
    regr_options = settings["regression_options"]
    if rerun or not os.path.isfile(paths["weights_file"]):
        from sklearn.kernel_ridge import KernelRidge
        log << "Make target: %s" % paths["weights_file"] << log.endl
        y_avg = np.average(targets)
        krr = KernelRidge(alpha=regr_options["lreg"], kernel='precomputed')
        krr.fit(K**regr_options["xi"], targets)
        y_predict = krr.predict(K**regr_options["xi"])
        kweights = krr.dual_coef_
        np.save(paths["weights_file"], kweights)
        np.save(paths["pred_file"], y_predict)
    else:
        log << "Load target: %s" % paths["weights_file"] << log.endl
        kweights = np.load(paths["weights_file"])
        y_predict = np.load(paths["pred_file"])
    if rerun or not os.path.isfile(paths["range_file"]):
        dset_attr = soap.DMapMatrixSet(paths["soap_file"])
        delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights,
                                    regr_options["xi"])
        json.dump(delta_Ys, open(paths["range_file"], "w"))
    else:
        delta_Ys = json.load(open(paths["range_file"]))
Example #9
0
def apply_environment_specific(configs, settings, options):
    log << log.mg << "Apply" << settings["embedding_options"][
        "channel_name"] << "to %d configs" % (len(configs)) << log.endl
    # LOAD MODEL
    log << "Loading model ..." << log.endl
    soap_version = settings["soap_options_ref"]
    soap_options = SETTINGS["soap_options"][soap_version]
    soap_excls = set(soap_options["exclude_centers"])
    kernel_options = settings["kernel_options"]
    target_key = settings["regression_options"]["target_key"]
    regr_options = settings["regression_options"]
    paths = settings["paths"]
    dset2 = soap.DMapMatrixSet(os.path.join(PATH, paths["soap_file"]))
    targets = np.load(os.path.join(PATH, paths["targets_file"]))
    kweights = np.load(os.path.join(PATH, paths["weights_file"]))
    y_predict = np.load(os.path.join(PATH, paths["pred_file"]))
    yrange = json.load(open(os.path.join(PATH, paths["range_file"])))
    yrange = np.concatenate(yrange)
    # ATTRIBUTE
    dset1 = soap.DMapMatrixSet(PRECALC_SOAPS[soap_version])
    delta_Ys = kernel_attribute(dset1, dset2, kernel_options, kweights,
                                regr_options["xi"])
    assert len(delta_Ys) == len(dset1)
    # ASSIGN BASIS WEIGHTS
    w_avg = np.average(yrange)
    w_std = np.std(yrange)
    z = options["weight"]
    log << "Channel normalization is" << z << log.endl
    channel_name = settings["embedding_options"]["channel_name"]
    basis = [
        GaussianBasisFct(w_avg + centre * w_std, sigma * w_std)
        for centre, sigma in zip(options["centres"], options["sigmas"])
    ]
    log << "- Using %d basis functions:" % (len(basis)) << log.endl
    for b in basis:
        log << "    Centre, width = %1.4f, %1.4f" % (b.centre,
                                                     b.sigma) << log.endl
    channel_names = ["%s%d" % (channel_name, i) for i in range(len(basis))]
    for cidx, config in enumerate(configs):
        log << log.back << "- Config %d" % cidx << log.flush
        types = config.symbols
        type_coords = []
        delta_Y = delta_Ys[cidx]
        attr_idx = 0
        sum_w = 0.0
        for t in types:
            if t in soap_excls:
                type_coords.append({})
            else:
                w = delta_Y[attr_idx]
                sum_w += w
                wb = np.array([fct(w) for fct in basis])
                if z is not None:
                    wb = norm_weights(wb, z)
                type_coords.append(
                    {channel_names[i]: wb[i]
                     for i in range(len(basis))})
                attr_idx += 1
        # TO CHECK:
        # >>> log << sum_w << "==" << y_predict[cidx] << log.endl
        config_add_weights(config, type_coords)
    log << log.endl
    return configs
Example #10
0
    dset.save(output_file)
    return dset

def evaluate_kernel(dset, options, output_file):
    kernel = soap.Kernel(options)
    symmetric = True
    K = kernel.evaluate(dset, dset, symmetric, "float64")
    z = 1./np.sqrt(K.diagonal())
    K = K*np.outer(z,z)
    np.save(output_file, K)
    log << log.mg << "Saved kernel =" << log.endl
    log << K << log.endl
    return K

if __name__ == "__main__":
    configs = soap.tools.io.read('data/structures.xyz')

    # Compute SOAP descriptors
    options = soap.soapy.configure_default()
    dset = evaluate_soap(configs, options, 'data/structures.soap')

    # Compute molecular kernel
    dset = soap.DMapMatrixSet("data/structures.soap")
    kernel_options = soap.Options()
    kernel_options.set("basekernel_type", "dot")
    kernel_options.set("base_exponent", 3.)
    kernel_options.set("base_filter", False)
    kernel_options.set("topkernel_type", "average")
    K = evaluate_kernel(dset, kernel_options, 'data/kernel.npy')