def kernel_attribute(dset1, dset2, kernel_options, kweights, xi): delta_Y = [] if kernel_options["topkernel_type"] == "average": for i in range(len(dset2)): dset2[i].sum() dset2[i].normalize() kernel = get_cxx_kernel(kernel_options) soap.silence() for i in range(len(dset1)): log << log.back << "Attribute" << i << log.flush Ki = kernel.attributeLeft(dset1[i], dset2, "float64") dset_i = soap.DMapMatrixSet() dset_i.append(dset1[i]) # TO CHECK # >>> K = kernel.evaluate(dset_i, dset2, False, "float64") # >>> print Ki # >>> print np.sum(Ki, axis=0), "==", K # >>> raw_input('...') Kii = kernel.evaluate(dset_i, dset_i, True, "float64") Ki = Ki / np.sum(Kii)**0.5 # Account for top-level exponent xi Ki = Ki * np.sum(Ki, axis=0)**(xi - 1) delta_Yi = Ki.dot(kweights) delta_Y.append(list(delta_Yi)) log << log.endl return delta_Y
def __init__(self, dset, slices): self.dset = dset self.dmap_ensembles = [] self.slices = slices for dset_slice in slices: ens = soap.DMapMatrixSet() for s in dset_slice: ens.append(dset[s]) self.dmap_ensembles.append(ens) return
def evaluate_soap(configs, options): dset = soap.DMapMatrixSet() for cidx, config in enumerate(configs): spectrum = soap.soapy.PowerSpectrum(config=config, options=options, label="config-%d" % cidx) dmap = spectrum.exportDMapMatrix() log << "Config %d: %d centers" % (cidx, dmap.rows) << log.endl dset.append(dmap) return dset
def setup(configs, soap_options, verbose=True): # Returns average normalized SOAP vectors (DMap's) for all configs, along with a dummy weight vector dset = soap.DMapMatrixSet() for idx, config in enumerate(configs): if verbose: log << log.back << "SOAP for structure" << idx << log.flush if idx == 0: soap_options["spectrum.gradients"] = True else: soap_options["spectrum.gradients"] = False dset.append(soap.soapy.PowerSpectrum(config, soap_options).exportDMapMatrix()) if verbose: log << log.endl for i in range(len(dset)): dset[i].sum() dset[i].normalize() return dset[0][0], [ dset[i][0] for i in range(1, len(dset)) ], np.ones(shape=(len(dset)-1,))/(len(dset)-1)
def evaluate_soap(configs, options): dset = soap.DMapMatrixSet() for cidx, config in enumerate(configs): # Exclude hydrogen centers: # NOTE that setting "exclude_centers"=["H"] is not sufficient # as discrete atomic types are lost in the embedding h_idcs = np.where(np.array(config.symbols) == "H")[0] options["exclude_center_ids"] = [int(i + 1) for i in h_idcs] spectrum = soap.soapy.PowerSpectrum(config=config, options=options, label="config-%d" % cidx) dmap = spectrum.exportDMapMatrix() log << "Config %d: %d centers" % (cidx, dmap.rows) << log.endl dset.append(dmap) return dset
def soap_evaluate(configs, options, output_file): dset = soap.DMapMatrixSet() for cidx, config in enumerate(configs): # Handle exclusions excl_idcs = [] ts = np.array(config.symbols) for excl in options["exclude_centers"]: excl_idcs_add = np.where(ts == excl)[0] excl_idcs = excl_idcs + list(excl_idcs_add) excl_idcs = sorted(excl_idcs) options["exclude_center_ids"] = [(i + 1) for i in excl_idcs] # Compute spectrum = soap.soapy.PowerSpectrum(config=config, options=options, label="config-%d" % cidx) dmap = spectrum.exportDMapMatrix() log << "Config %d: %d centers" % (cidx, dmap.rows) << log.endl dset.append(dmap) dset.save(output_file) return dset
def test_dmap_convolve(): log << log.mg << "<test_dmap_convolve>" << log.endl configs = soap.tools.io.read('structures.xyz') soap_options = configure_default() dset = soap.DMapMatrixSet() for idx, config in enumerate(configs): spec = soap.soapy.PowerSpectrum(config, soap_options, "S%d" % idx) soap.toggle_logger() dmap_x = spec.exportDMapMatrix() dmap_q = soap.DMapMatrix() dmap_q.appendCoherent(spec.spectrum) dmap_q.convolve(9, 6) dset.append(dmap_q) kxx = dmap_x.dot(dmap_x, "float64") kqq = dmap_q.dot(dmap_q, "float64") kxq = dmap_x.dot(dmap_q, "float64") kqx = dmap_q.dot(dmap_x, "float64") assert_zero(np.max(np.abs(kxx - kqq))) assert_zero(np.max(np.abs(kxx - kxq))) assert_zero(np.max(np.abs(kxx - kqx))) soap.toggle_logger() if idx == 10: break log << log.endl log << log.mg << "All passed" << log.endl
def parametrize_environment_specific(settings, rerun): channel_name = settings["embedding_options"]["channel_name"] log << log.mg << "Parametrizing" << channel_name << "model" << log.endl soap_types = SETTINGS["soap_types"] log << "Particle SOAP types are" << ", ".join(soap_types) << log.endl # PATHS - for example: # { "xyz_file": "data_esol/structures.xyz", # "soap_file": "data_esol/structures.soap", # "kmat_file": "data_esol/kernel.npy", # "targets_file": "data_esol/targets.npy", # "range_file": "data_esol/range.json", # "weights_file": "data_esol/weights.npy" } paths = copy.deepcopy(settings["paths"]) for p, v in paths.iteritems(): paths[p] = os.path.join(PATH, v) log << "Path to %s = %s" % (p, paths[p]) << log.endl configs = soap.tools.io.read(paths["xyz_file"]) # SOAP soap_options = SETTINGS["soap_options"][settings["soap_options_ref"]] if rerun or not os.path.isfile(paths["soap_file"]): log << "Make target: %s" % paths["soap_file"] << log.endl soap_configure_default(types=soap_types) dset = soap_evaluate(configs, soap_options, paths["soap_file"]) else: log << "Load target: %s" % paths["soap_file"] << log.endl dset = soap.DMapMatrixSet(paths["soap_file"]) # KERNEL kernel_options = settings["kernel_options"] if rerun or not os.path.isfile(paths["kmat_file"]): log << "Make target: %s" % paths["kmat_file"] << log.endl K = kernel_evaluate(dset, kernel_options, paths["kmat_file"]) else: log << "Load target: %s" % paths["kmat_file"] << log.endl K = np.load(paths["kmat_file"]) # TARGETS target_key = settings["regression_options"]["target_key"] if rerun or not os.path.isfile(paths["targets_file"]): log << "Make target: %s" % paths["targets_file"] << log.endl targets = np.array([float(c.info[target_key]) for c in configs]) np.save(paths["targets_file"], targets) else: log << "Load target: %s" % paths["targets_file"] << log.endl targets = np.load(paths["targets_file"]) # MODEL regr_options = settings["regression_options"] if rerun or not os.path.isfile(paths["weights_file"]): from sklearn.kernel_ridge import KernelRidge log << "Make target: %s" % paths["weights_file"] << log.endl y_avg = np.average(targets) krr = KernelRidge(alpha=regr_options["lreg"], kernel='precomputed') krr.fit(K**regr_options["xi"], targets) y_predict = krr.predict(K**regr_options["xi"]) kweights = krr.dual_coef_ np.save(paths["weights_file"], kweights) np.save(paths["pred_file"], y_predict) else: log << "Load target: %s" % paths["weights_file"] << log.endl kweights = np.load(paths["weights_file"]) y_predict = np.load(paths["pred_file"]) if rerun or not os.path.isfile(paths["range_file"]): dset_attr = soap.DMapMatrixSet(paths["soap_file"]) delta_Ys = kernel_attribute(dset_attr, dset, kernel_options, kweights, regr_options["xi"]) json.dump(delta_Ys, open(paths["range_file"], "w")) else: delta_Ys = json.load(open(paths["range_file"]))
def apply_environment_specific(configs, settings, options): log << log.mg << "Apply" << settings["embedding_options"][ "channel_name"] << "to %d configs" % (len(configs)) << log.endl # LOAD MODEL log << "Loading model ..." << log.endl soap_version = settings["soap_options_ref"] soap_options = SETTINGS["soap_options"][soap_version] soap_excls = set(soap_options["exclude_centers"]) kernel_options = settings["kernel_options"] target_key = settings["regression_options"]["target_key"] regr_options = settings["regression_options"] paths = settings["paths"] dset2 = soap.DMapMatrixSet(os.path.join(PATH, paths["soap_file"])) targets = np.load(os.path.join(PATH, paths["targets_file"])) kweights = np.load(os.path.join(PATH, paths["weights_file"])) y_predict = np.load(os.path.join(PATH, paths["pred_file"])) yrange = json.load(open(os.path.join(PATH, paths["range_file"]))) yrange = np.concatenate(yrange) # ATTRIBUTE dset1 = soap.DMapMatrixSet(PRECALC_SOAPS[soap_version]) delta_Ys = kernel_attribute(dset1, dset2, kernel_options, kweights, regr_options["xi"]) assert len(delta_Ys) == len(dset1) # ASSIGN BASIS WEIGHTS w_avg = np.average(yrange) w_std = np.std(yrange) z = options["weight"] log << "Channel normalization is" << z << log.endl channel_name = settings["embedding_options"]["channel_name"] basis = [ GaussianBasisFct(w_avg + centre * w_std, sigma * w_std) for centre, sigma in zip(options["centres"], options["sigmas"]) ] log << "- Using %d basis functions:" % (len(basis)) << log.endl for b in basis: log << " Centre, width = %1.4f, %1.4f" % (b.centre, b.sigma) << log.endl channel_names = ["%s%d" % (channel_name, i) for i in range(len(basis))] for cidx, config in enumerate(configs): log << log.back << "- Config %d" % cidx << log.flush types = config.symbols type_coords = [] delta_Y = delta_Ys[cidx] attr_idx = 0 sum_w = 0.0 for t in types: if t in soap_excls: type_coords.append({}) else: w = delta_Y[attr_idx] sum_w += w wb = np.array([fct(w) for fct in basis]) if z is not None: wb = norm_weights(wb, z) type_coords.append( {channel_names[i]: wb[i] for i in range(len(basis))}) attr_idx += 1 # TO CHECK: # >>> log << sum_w << "==" << y_predict[cidx] << log.endl config_add_weights(config, type_coords) log << log.endl return configs
dset.save(output_file) return dset def evaluate_kernel(dset, options, output_file): kernel = soap.Kernel(options) symmetric = True K = kernel.evaluate(dset, dset, symmetric, "float64") z = 1./np.sqrt(K.diagonal()) K = K*np.outer(z,z) np.save(output_file, K) log << log.mg << "Saved kernel =" << log.endl log << K << log.endl return K if __name__ == "__main__": configs = soap.tools.io.read('data/structures.xyz') # Compute SOAP descriptors options = soap.soapy.configure_default() dset = evaluate_soap(configs, options, 'data/structures.soap') # Compute molecular kernel dset = soap.DMapMatrixSet("data/structures.soap") kernel_options = soap.Options() kernel_options.set("basekernel_type", "dot") kernel_options.set("base_exponent", 3.) kernel_options.set("base_filter", False) kernel_options.set("topkernel_type", "average") K = evaluate_kernel(dset, kernel_options, 'data/kernel.npy')