def fchl_linear(atoms_list, coordinates_list, cut_distance=1e6): nmax = max([atoms.shape[0] for atoms in atoms_list]) rep_list = get_fchl_representations(atoms_list, coordinates_list, nmax, cut_distance=cut_distance) sigmas = [0.01 * 2**i for i in range(8)] kernel_args = { "kernel": "linear", "cut_distance": cut_distance, "alchemy": 'off' } kernels = fchl.get_global_symmetric_kernels(rep_list, **kernel_args) # Felix stuff # diagonal = kernel[np.diag_indices_from(kernel)] # new_norm = np.sqrt(diagonal[np.newaxis]*diagonal[:,np.newaxis]) # kernel /= new_norm # diagonal = kernel[np.diag_indices_from(kernel)] # new_norm = (diagonal[np.newaxis] + diagonal[:,np.newaxis])/2.0 # kernel -= new_norm # kernel += 1 return kernels
def unique(atoms, coordinates_list, method="rmsd", threshold=None): """ @param coordinates_list method @return unique_list """ unique_list = [coordinates_list[0]] idx_list = [0] if method == "qml": replist = [] for coordinates in coordinates_list: rep = fchl.generate_representation(coordinates, atoms, max_size=20, cut_distance=10**6) replist.append(rep) replist = np.array(replist) # fchl uniqueness sigmas = [0.625, 1.25, 2.5, 5.0, 10.0] sigmas = [0.8] fchl_kernels = fchl.get_global_symmetric_kernels( replist, kernel_args={"sigma": sigmas}, cut_distance=10**6, alchemy="off") idx_list = unique_from_kernel(fchl_kernels[0]) elif method == "rmsd": threshold = 0.004 for i, coordinates in enumerate(coordinates_list): if not exists(unique_list, coordinates): unique_list.append(coordinates) idx_list.append(i) return idx_list
def get_kernel_fchl(rep_alpha, rep_beta): sigmas = [0.8] if id(rep_alpha) == id(rep_beta): kernel, = fchl.get_global_symmetric_kernels( rep_alpha, kernel_args={"sigma": sigmas}, cut_distance=10**6, alchemy="off") else: kernel, = fchl.get_global_kernels(rep_alpha, rep_beta, kernel_args={"sigma": sigmas}, cut_distance=10**6, alchemy="off") return kernel
def fchl_multiquadratic(atoms_list, coordinates_list, cut_distance=1e6): nmax = max([atoms.shape[0] for atoms in atoms_list]) rep_list = get_fchl_representations(atoms_list, coordinates_list, nmax, cut_distance=cut_distance) sigmas = [0.01 * 2**i for i in range(8)] kernel_args = { "kernel": "multiquadratic", "kernel_args": { "c": [0.0], }, "cut_distance": cut_distance, "alchemy": 'off' } kernels = fchl.get_global_symmetric_kernels(rep_list, **kernel_args) return kernels
def get_kernel_fchl(rep_alpha, rep_beta, debug=False): # Print OMP if debug: print(os.environ["OMP_NUM_THREADS"]) sigmas = [0.8] if id(rep_alpha) == id(rep_beta): kernel, = fchl.get_global_symmetric_kernels( rep_alpha, kernel_args={"sigma": sigmas}, cut_distance=10**6, alchemy="off") else: kernel, = fchl.get_global_kernels(rep_alpha, rep_beta, kernel_args={"sigma": sigmas}, cut_distance=10**6, alchemy="off") return kernel
def test_krr_fchl_global(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.Compound() objects" mols = [] for xyz_file in sorted(data.keys())[:100]: # Initialize the qml.Compound() objects mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.representation = generate_representation(mol.coordinates, \ mol.nuclear_charges, cut_distance=1e6) mols.append(mol) # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = len(mols) // 3 n_train = len(mols) - n_test training = mols[:n_train] test = mols[-n_test:] X = np.array([mol.representation for mol in training]) Xs = np.array([mol.representation for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # Set hyper-parameters sigma = 100.0 llambda = 1e-8 K_symmetric = get_global_symmetric_kernels(X, [sigma])[0] K = get_global_kernels(X, X, [sigma])[0] assert np.allclose(K, K_symmetric), "Error in FCHL symmetric global kernels" assert np.invert(np.all( np.isnan(K_symmetric))), "FCHL global symmetric kernel contains NaN" assert np.invert(np.all(np.isnan(K))), "FCHL global kernel contains NaN" # Solve alpha K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # # Calculate prediction kernel Ks = get_global_kernels(Xs, X, [sigma])[0] assert np.invert(np.all( np.isnan(Ks))), "FCHL global testkernel contains NaN" Yss = np.dot(Ks, alpha) mae = np.mean(np.abs(Ys - Yss)) assert abs(2 - mae) < 1.0, "Error in FCHL global kernel-ridge regression"