def get_representations_fchl(atoms, coordinates_list): replist = [] for coordinates in coordinates_list: rep = fchl.generate_representation(coordinates, atoms, max_size=30, cut_distance=10**6) replist.append(rep) return replist
def test_krr_fchl_atomic(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.Compound() objects" mols = [] for xyz_file in sorted(data.keys())[:10]: # Initialize the qml.Compound() objects mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.representation = generate_representation(mol.coordinates, \ mol.nuclear_charges, cut_distance=1e6) mols.append(mol) X = np.array([mol.representation for mol in mols]) # Set hyper-parameters sigma = 2.5 K = get_local_symmetric_kernels(X, [sigma])[0] K_test = np.zeros((len(mols), len(mols))) for i, Xi in enumerate(X): for j, Xj in enumerate(X): K_atomic = get_atomic_kernels(Xi[:mols[i].natoms], Xj[:mols[j].natoms], [sigma])[0] K_test[i, j] = np.sum(K_atomic) assert np.invert(np.all( np.isnan(K_atomic))), "FCHL atomic kernel contains NaN" if (i == j): K_atomic_symmetric = get_atomic_symmetric_kernels( Xi[:mols[i].natoms], [sigma])[0] assert np.allclose(K_atomic, K_atomic_symmetric ), "Error in FCHL symmetric atomic kernels" assert np.invert(np.all(np.isnan(K_atomic_symmetric)) ), "FCHL atomic symmetric kernel contains NaN" assert np.allclose(K, K_test), "Error in FCHL atomic kernels"
def unique(atoms, coordinates_list, method="rmsd", threshold=None): """ @param coordinates_list method @return unique_list """ unique_list = [coordinates_list[0]] idx_list = [0] if method == "qml": replist = [] for coordinates in coordinates_list: rep = fchl.generate_representation(coordinates, atoms, max_size=20, cut_distance=10**6) replist.append(rep) replist = np.array(replist) # fchl uniqueness sigmas = [0.625, 1.25, 2.5, 5.0, 10.0] sigmas = [0.8] fchl_kernels = fchl.get_global_symmetric_kernels( replist, kernel_args={"sigma": sigmas}, cut_distance=10**6, alchemy="off") idx_list = unique_from_kernel(fchl_kernels[0]) elif method == "rmsd": threshold = 0.004 for i, coordinates in enumerate(coordinates_list): if not exists(unique_list, coordinates): unique_list.append(coordinates) idx_list.append(i) return idx_list
def get_representations_fchl(charge_list, coordinates_list, parameters): nmax = parameters['nmax'] cut_distance = parameters['cut_distance'] rep_list = [] for atoms, coordinates in zip(charge_list, coordinates_list): rep = qml_fchl.generate_representation( coordinates, atoms, max_size=nmax, neighbors=nmax, cut_distance=cut_distance) rep_list.append(rep) rep_list = np.array(rep_list) return rep_list
def get_fchl_representations(atoms_list, coordinates_list, nmax, cut_distance=1e6): rep_list = [] charge_list = [] for atoms in atoms_list: charge_list.append([get_atom(atom) for atom in atoms]) for atoms, coordinates in zip(charge_list, coordinates_list): rep = fchl.generate_representation(coordinates, atoms, max_size=nmax, neighbors=nmax, cut_distance=cut_distance) rep_list.append(rep) rep_list = np.array(rep_list) return rep_list
trainset = open('/ihome/ghutchison/dlf57/ml-benchmark/train-ani.pkl', 'rb') anitrain = pickle.load(trainset) reps = [] energies = [] for ani in anitrain: try: coords = ani['coordinates'] elements = ani['species'] energy = ani['energy'] nuc = [] for k in range(len(elements)): at_num = __nuc['{}'.format(elements[k])] nuc.append(at_num) rep = generate_representation(coords, nuc, max_size=45) reps.append(rep) energies.append(energy) except: print(ani['molecule']) print(ani['species']) print(ani['coordinates']) print(ani['energy']) X = np.array(reps)[:5000] y = np.array(energies)[:5000] sigma = 2.5 K = get_local_kernels(X, X, [sigma], cut_distance=10.0)[0] K[np.diag_indices_from(K)] += 1e-8
def test_fchl_local_periodic(): nuclear_charges = [ np.array([ 13, 13, 58, 58, 58, 58, 58, 58, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 23, 23 ]), np.array([ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 73, 73, 73, 73, 81, 81, 81, 81 ]), np.array([48, 8, 8, 8, 8, 8, 8, 51, 51]), np.array([ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 ]), np.array([58, 58, 8, 8, 8, 8, 8, 8, 8, 8, 23, 23]) ] cells = np.array([[[1.01113290e+01, -1.85000000e-04, 0.00000000e+00], [-5.05582400e+00, 8.75745400e+00, 0.00000000e+00], [0.00000000e+00, 0.00000000e+00, 6.15100100e+00]], [[9.672168, 0., 0.], [0., 3.643786, 0.], [0., 0., 14.961818]], [[5.28208000e+00, -1.20000000e-05, 1.50000000e-05], [-2.64105000e+00, 4.57443400e+00, -3.00000000e-05], [1.40000000e-05, -2.40000000e-05, 4.77522000e+00]], [[-1.917912, 3.321921, 0.], [3.835824, 0., 0.], [1.917912, -1.107307, -56.423542]], [[3.699168, 3.699168, -3.255938], [3.699168, -3.699168, 3.255938], [-3.699168, -3.699168, -3.255938]]]) fractional_coordinates = [ [[0.6666706, 0.33333356, 0.15253127], [0.33332896, 0.66666655, 0.65253119], [0.14802736, 0.375795, 0.23063888], [0.62422269, 0.77225019, 0.23063888], [0.22775607, 0.85196133, 0.23063888], [0.77224448, 0.14803879, 0.7306388], [0.37577687, 0.22774993, 0.7306388], [0.8519722, 0.62420512, 0.7306388], [0.57731818, 0.47954083, 0.0102715], [0.52043884, 0.09777799, 0.01028288], [0.90211803, 0.4226259, 0.01032677], [0.33335216, 0.6666734, 0.01482035], [0.90959766, 0.77585201, 0.30637615], [0.86626106, 0.09040873, 0.3063794], [0.22415808, 0.13374794, 0.30638265], [0.42268138, 0.52045928, 0.51027142], [0.47956114, 0.90222098, 0.5102828], [0.09788153, 0.57737421, 0.51032669], [0.6666474, 0.33332671, 0.51482027], [0.09040133, 0.22414696, 0.80637769], [0.13373793, 0.90959025, 0.80637932], [0.77584247, 0.86625217, 0.80638257], [0., 0., 0.05471142], [0., 0., 0.55471134]], [[0.81615001, 0.75000014, 0.00116296], [0.52728096, 0.25000096, 0.0993275], [0.24582596, 0.75000014, 0.2198563], [0.74582658, 0.75000014, 0.28014376], [0.02728137, 0.25000096, 0.40067257], [0.31615042, 0.75000014, 0.49883711], [0.68384978, 0.25000096, 0.50116236], [0.97271884, 0.75000014, 0.59932757], [0.25417362, 0.25000096, 0.71985637], [0.75417321, 0.25000096, 0.78014383], [0.47271925, 0.75000014, 0.90067263], [0.1838502, 0.25000096, 0.99883717], [0.33804831, 0.75000014, 0.07120258], [0.83804789, 0.75000014, 0.42879749], [0.16195232, 0.25000096, 0.57120198], [0.6619519, 0.25000096, 0.92879756], [0.98245812, 0.25000096, 0.17113829], [0.48245853, 0.25000096, 0.32886177], [0.51754167, 0.75000014, 0.67113836], [0.01754209, 0.75000014, 0.82886184]], [[0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [3.66334233e-01, 1.96300000e-07, 2.70922493e-01], [6.33665197e-01, 6.33666177e-01, 2.70923540e-01], [3.62000000e-08, 3.66333081e-01, 2.70923851e-01], [6.70000000e-09, 6.33664733e-01, 7.29076149e-01], [6.33664135e-01, 9.99998055e-01, 7.29076460e-01], [3.66336157e-01, 3.66334260e-01, 7.29077507e-01], [3.33333635e-01, 6.66667395e-01, 4.99998953e-01], [6.66667720e-01, 3.33333042e-01, 5.00000000e-01]], [[0.3379644, 0.66203644, 0.01389048], [0.02316309, 0.97683587, 0.06948926], [0.70833843, 0.29165976, 0.12501892], [0.39352259, 0.60647824, 0.18056506], [0.74538243, 0.25461577, 0.2361509], [0.09722803, 0.90277092, 0.2916841], [0.44907919, 0.55092165, 0.34723485], [0.8009281, 0.1990701, 0.4027879], [0.15278103, 0.84721793, 0.45834308], [0.83797345, 0.16202475, 0.51392396], [0.52315813, 0.4768427, 0.56947169], [0.20833916, 0.7916598, 0.62501748], [0.89352691, 0.10647128, 0.68058436], [0.57870427, 0.42129656, 0.73611012], [0.93056329, 0.06943491, 0.79169347], [0.28241704, 0.71758191, 0.84725114], [0.63426956, 0.36573128, 0.90280596], [0.98611817, 0.01388002, 0.95835813], [0., 0., 0.], [0.35185434, 0.64814649, 0.05556032], [0.03704151, 0.96295744, 0.11112454], [0.72221887, 0.27777932, 0.16666022], [0.40741437, 0.59258647, 0.22224039], [0.75926009, 0.24073811, 0.27778387], [0.11111195, 0.888887, 0.33333586], [0.46296234, 0.53703849, 0.38888431], [0.81480954, 0.18518866, 0.44443222], [0.16667233, 0.83332662, 0.500017], [0.85185117, 0.14814703, 0.55555711], [0.53704217, 0.46295866, 0.61112381], [0.22222196, 0.777777, 0.66666587], [0.90740847, 0.09258972, 0.72222903], [0.59259328, 0.40740756, 0.77777712], [0.94444213, 0.05555607, 0.83333], [0.29630132, 0.70369764, 0.88890396], [0.64815247, 0.35184836, 0.94445471]], [[0., 0., 0.], [0.75000042, 0.50000027, 0.25000015], [0.15115386, 0.81961403, 0.33154037], [0.51192691, 0.18038651, 0.3315404], [0.08154025, 0.31961376, 0.40115401], [0.66846017, 0.81961403, 0.48807366], [0.08154025, 0.68038678, 0.76192703], [0.66846021, 0.18038651, 0.84884672], [0.23807355, 0.31961376, 0.91846033], [0.59884657, 0.68038678, 0.91846033], [0.50000031, 0., 0.50000031], [0.25000015, 0.50000027, 0.75000042]] ] n = 5 X = np.array([ generate_representation(fractional_coordinates[i], nuclear_charges[i], cell=cells[i], max_size=36, neighbors=200, cut_distance=7.0) for i in range(5) ]) sigmas = [2.5] K = get_local_symmetric_kernels(X, sigmas, cut_distance=7.0, cut_start=0.7) K_ref = np.array([ [530.03184304, 435.65196293, 198.61245535, 782.49428327, 263.53562172], [435.65196293, 371.35281119, 163.83766549, 643.99777576, 215.04338938], [198.61245535, 163.83766549, 76.12134823, 295.02739281, 99.89595704], [ 782.49428327, 643.99777576, 295.02739281, 1199.61736141, 389.31169487 ], [263.53562172, 215.04338938, 99.89595704, 389.31169487, 133.36920188] ]) assert np.allclose(K, K_ref), "Error in periodic FCHL"
def test_krr_fchl_global(): test_dir = os.path.dirname(os.path.realpath(__file__)) # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames data = get_energies(test_dir + "/data/hof_qm7.txt") # Generate a list of qml.Compound() objects" mols = [] for xyz_file in sorted(data.keys())[:100]: # Initialize the qml.Compound() objects mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file) # Associate a property (heat of formation) with the object mol.properties = data[xyz_file] # This is a Molecular Coulomb matrix sorted by row norm mol.representation = generate_representation(mol.coordinates, \ mol.nuclear_charges, cut_distance=1e6) mols.append(mol) # Shuffle molecules np.random.seed(666) np.random.shuffle(mols) # Make training and test sets n_test = len(mols) // 3 n_train = len(mols) - n_test training = mols[:n_train] test = mols[-n_test:] X = np.array([mol.representation for mol in training]) Xs = np.array([mol.representation for mol in test]) # List of properties Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # Set hyper-parameters sigma = 100.0 llambda = 1e-8 K_symmetric = get_global_symmetric_kernels(X, [sigma])[0] K = get_global_kernels(X, X, [sigma])[0] assert np.allclose(K, K_symmetric), "Error in FCHL symmetric global kernels" assert np.invert(np.all( np.isnan(K_symmetric))), "FCHL global symmetric kernel contains NaN" assert np.invert(np.all(np.isnan(K))), "FCHL global kernel contains NaN" # Solve alpha K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # # Calculate prediction kernel Ks = get_global_kernels(Xs, X, [sigma])[0] assert np.invert(np.all( np.isnan(Ks))), "FCHL global testkernel contains NaN" Yss = np.dot(Ks, alpha) mae = np.mean(np.abs(Ys - Yss)) assert abs(2 - mae) < 1.0, "Error in FCHL global kernel-ridge regression"
def calculate(charges_mike, coordinates_mike): global train_representations global train_displaced_representations global train_alphas global NMAX global CUT_DISTANCE global KERNEL_ARGS global DX print("before") print("mike charge", charges_mike) print("mike coord", coordinates_mike) print("mike lr", len(charges_mike)) print("mike lo", len(coordinates_mike)) #charges = [NUCLEAR_CHARGE[atom] for atom in atoms] #charges = np.array(charges) N = len(charges_mike) coordinates = np.zeros(N*3) charges = np.zeros(N, dtype=int) for i, coord in enumerate(coordinates_mike): coordinates[i] = coord for i, charge in enumerate(charges_mike): charges[i] = charge coordinates = coordinates.reshape((N,3)) print("charges", charges) print("coord", coordinates) print("len charge", len(charges)) print("len coord", len(coordinates)) rep = generate_representation(coordinates, charges, max_size=NMAX, cut_distance=CUT_DISTANCE) disp_rep = generate_displaced_representations(coordinates, charges, max_size=NMAX, cut_distance=CUT_DISTANCE, dx=DX) list_rep = np.array([rep]) list_disp_rep = np.array([disp_rep]) # generate kernel kernel_energies, kernel_forces = get_kernel( train_representations, list_rep, train_displaced_representations, list_disp_rep) kernel_energies = kernel_energies[0] kernel_forces = kernel_forces[0] print("kernel shape", kernel_forces.shape) # predict energies = np.dot(kernel_energies.T, train_alphas) forces = np.dot(kernel_forces.T, train_alphas) print("after") print(energies[0], forces) return energies[0], forces
def main(): read_model("data/butane") atoms, coordinates = rmsd.get_coordinates_xyz("data/butane/butane-1.xyz") energy, force = calculate(atoms, coordinates) print(energy) print(force) quit() description = """ """ import argparse parser = argparse.ArgumentParser() parser.add_argument('-f', '--filename', action='store', help='List of molecules', metavar='listfile') parser.add_argument('-m', '--model', action='store', help='Output model in npy format', metavar='file') args = parser.parse_args() # Load model PARAMETERS = np.load(args.model + ".parameters.npy") train_representations = np.load(args.model + ".representations.npy") train_displaced_representations = np.load(args.model + ".displaced_representations.npy") train_alphas = np.load(args.model + ".alphas.npy") # Get molecule filenames f = open(args.filename, 'r') molecules = f.readlines() molecules = [mol.strip() for mol in molecules] f.close() DIRECTORY = args.filename.split("/") DIRECTORY = "/".join(DIRECTORY[:-1]) + "/" # Init all the rep lists list_atoms = [] list_charges = [] list_coordinates = [] list_energies = [] list_forces = [] list_rep = [] list_disp_rep = [] list_disp_rep5 = [] # HYPER PARAMETERS CUT_DISTANCE = PARAMETERS.item().get('cut_distance') KERNEL_ARGS = PARAMETERS.item().get('kernel_args') DX = PARAMETERS.item().get('dx') NMAX = PARAMETERS.item().get('max_atoms') # read coordinates for filename in molecules: atoms, coordinates = rmsd.get_coordinates_xyz(DIRECTORY + filename + ".xyz") charges = [NUCLEAR_CHARGE[atom] for atom in atoms] rep = generate_representation(coordinates, charges, max_size=NMAX, cut_distance=CUT_DISTANCE) disp_rep = generate_displaced_representations(coordinates, charges, max_size=NMAX, cut_distance=CUT_DISTANCE, dx=DX) list_rep.append(rep) list_disp_rep.append(disp_rep) break list_rep = np.array(list_rep) list_disp_rep = np.array(list_disp_rep) # generate kernel kernel_energies, kernel_forces = get_kernel( train_representations, list_rep, train_displaced_representations, list_disp_rep, kernel_args=KERNEL_ARGS, dx=DX) kernel_energies = kernel_energies[0] kernel_forces = kernel_forces[0] # predict energies = np.dot(kernel_energies.T, train_alphas) forces = np.dot(kernel_forces.T, train_alphas) print(energies) print(forces)
def main(): description = """ Based on a list of molecules, train a representation-set and alpha set. Output the npy files """ import argparse parser = argparse.ArgumentParser() parser.add_argument('-f', '--filename', action='store', help='List of molecules', metavar='listfile') parser.add_argument('-d', '--dump', action='store', help='Output model in npy format', metavar='file') parser.add_argument('--test', action='store_true') parser.add_argument('--optimize', action='store_true') args = parser.parse_args() # Get molecule filenames f = open(args.filename, 'r') molecules = f.readlines() molecules = [mol.strip() for mol in molecules] f.close() DIRECTORY = args.filename.split("/") DIRECTORY = "/".join(DIRECTORY[:-1]) + "/" # Init all the rep lists list_atoms = [] list_charges = [] list_coordinates = [] list_energies = [] list_forces = [] list_rep = [] list_disp_rep = [] list_disp_rep5 = [] # HYPER PARAMETERS CUT_DISTANCE = 1e6 KERNEL_ARGS = { "verbose": False, "cut_distance": CUT_DISTANCE, "kernel": "gaussian", "kernel_args": { "sigma": [0.64], }, } DX = 0.005 # read coordinates for filename in molecules: atoms, coordinates = rmsd.get_coordinates_xyz(DIRECTORY + filename + ".xyz") nuclear_charges = [NUCLEAR_CHARGE[atom] for atom in atoms] f = open(DIRECTORY + filename + ".energy", 'r') energy = next(f) energy = float(energy) force = [] for line in f: force.append(line.split(",")) force = np.array(force, dtype=float) list_atoms.append(atoms) list_charges.append(nuclear_charges) list_coordinates.append(coordinates) list_energies.append(energy) list_forces.append(force) # Calculate NMAX hyperprameter NMAX = [len(x) for x in list_atoms] NMAX = np.max(NMAX) # Save model parameters PARAMETERS = { "kernel_args": KERNEL_ARGS, "cut_distance": CUT_DISTANCE, "max_atoms": NMAX, "dx": DX } # Calculate representations for charges, coordinates in zip(list_charges, list_coordinates): rep = generate_representation(coordinates, charges, max_size=NMAX, cut_distance=CUT_DISTANCE) disp_rep = generate_displaced_representations( coordinates, charges, max_size=NMAX, cut_distance=CUT_DISTANCE, dx=DX) list_rep.append(rep) list_disp_rep.append(disp_rep) list_atoms = np.array(list_atoms) list_coordinates = np.array(list_coordinates) list_energies = np.array(list_energies) list_forces = np.array(list_forces) list_rep = np.array(list_rep) list_disp_rep = np.array(list_disp_rep) # Hack, easy way to normalize energies (same molecule) avg = np.sum(list_energies) / len(list_energies) list_energies -= avg # hatree / bohr to hatree / aangstroem list_forces *= 1.0 / 0.529177249 # generate train / test views view_all = np.array(range(len(molecules))) # view_train, view_valid = np.split(view_all, 2) view_train = view_all # TODO cross-validation of hyper-parameter optimization # generate kernel kernel_train_energies, kernel_train_deriv = wrapper.get_kernel( list_rep[view_train], list_rep[view_train], list_disp_rep[view_train], list_disp_rep[view_train], dx=DX, kernel_args=KERNEL_ARGS) kernel_train_energies = kernel_train_energies[0] kernel_train_deriv = kernel_train_deriv[0] # generate alphas alphas = wrapper.get_alphas(kernel_train_energies, kernel_train_deriv, list_energies[view_train], list_forces[view_train]) # dump the model np.save(args.dump + ".alphas", alphas) np.save(args.dump + ".representations", list_rep) np.save(args.dump + ".displaced_representations", list_disp_rep) np.save(args.dump + ".parameters", PARAMETERS) # self test if args.selftest: energy_valid = np.dot(kernel_train_energies.T, alphas) force_valid = np.dot(kernel_train_deriv.T, alphas) print( mae(list_energies[view_train], energy_valid) < 0.08, "Error in operator test energy") print( mae(list_forces[view_train].flatten(), force_valid) < 0.1, "Error in operator test force") return