Exemple #1
0
def gen_representations(data):

    nuclear_charges = []

    # print(list(data.keys()))

    # print(data["Z"])

    max_atoms = max([len(_) for _ in data["Z"]])
    elements = sorted(list(set(data["Z"].reshape(-1).tolist())))

    print("max_atoms", max_atoms)
    print("elements", elements)

    reps = []
    dreps = []

    for i in tqdm(range(len(data["E"]))):
        x, dx = generate_fchl_acsf(data["Z"][i],
                                   data["R"][i],
                                   elements=elements,
                                   gradients=True,
                                   pad=max_atoms)

        reps.append(x)
        dreps.append(dx)

    energies = data["E"].flatten()
    nuclear_charges = data["Z"].tolist()

    reps = np.array(reps)
    dreps = np.array(dreps)

    return reps, dreps, nuclear_charges, energies
Exemple #2
0
def csv_to_reps(csv_filename, n=32):

    # max_atoms = 12 # HARDCODED for ETHANOL

    df = pandas.read_csv(csv_filename, sep=";|")

    max_n = len(df["atomization_energy"])
    n = min(max_n, n)
    index = np.random.choice(max_n, size=n, replace=False)

    print(csv_filename, max_n)

    X = []
    dX = []
    Q = []

    E = []
    F = []

    for i in index:

        coordinates = np.array(ast.literal_eval(df["coordinates"][i]))
        nuclear_charges = np.array(ast.literal_eval(df["nuclear_charges"][i]),
                                   dtype=np.int32)
        atomtypes = ast.literal_eval(df["atomtypes"][i])

        force = np.array(ast.literal_eval(df["forces"][i]))
        energy = float(df["atomization_energy"][i])

        # HACK
        new_cut = 4.0

        cut_parameters = {
            "rcut": new_cut,
            "acut": new_cut,
            # "nRs2": int(24 * new_cut / 8.0),
            # "nRs3": int(20 * new_cut / 8.0),
        }

        (rep, drep) = generate_fchl_acsf(nuclear_charges,
                                         coordinates,
                                         gradients=True,
                                         pad=MAX_ATOMS,
                                         elements=[1, 6, 8],
                                         **cut_parameters)

        X.append(rep)
        dX.append(drep)
        Q.append(nuclear_charges)
        E.append(energy)
        F.append(force)

    X = np.array(X)
    dX = np.array(dX)
    E = np.array(E).flatten()
    #  = np.concatenate(F)

    return X, dX, Q, E, F
Exemple #3
0
def predict(nuclear_charges, coordinates):
    """

    Given a query molecule (charges and coordinates) predict energy and forces

    """

    # Initialize training data (only need to do this once)
    alpha = np.load(FILENAME_ALPHAS)
    X = np.load(FILENAME_REPRESENTATIONS)
    Q = np.load(FILENAME_CHARGES)

    # Generate representation
    max_atoms = X.shape[1]
    (rep, drep) = generate_fchl_acsf(nuclear_charges,
                                     coordinates,
                                     gradients=True,
                                     pad=max_atoms)

    # Put data into arrays
    Qs = [nuclear_charges]
    Xs = np.array([rep])
    dXs = np.array([drep])

    # Get kernels
    Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA)
    Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA)

    # Offset from training
    offset = -97084.83100465109

    # Energy prediction
    energy_predicted = np.dot(Kse, alpha)[0] + offset

    energy_true = -97086.55524903

    print("True energy      %16.4f kcal/mol" % energy_true)
    print("Predicted energy %16.4f kcal/mol" % energy_predicted)

    # Force prediction
    forces_predicted = np.dot(Ks, alpha).reshape((len(nuclear_charges), 3))

    forces_true = np.array([[-66.66673100, 2.45752385, 49.92224945],
                            [-17.98600137, 68.72856500, -28.82689294],
                            [31.88432927, 8.98739402, -18.11946195],
                            [4.19798833, -31.31692744, 8.12825145],
                            [16.78395377, -24.76072606, -38.99054658],
                            [6.03046276, -7.24928076, -3.88797517],
                            [17.44954868, 0.21604968, 8.56118603],
                            [11.73901551, -19.38200606, 13.26191987],
                            [-3.43256595, 2.31940789, 9.95126984]])

    print("True forces [kcal/mol]")
    print(forces_true)
    print("Predicted forces [kcal/mol]")
    print(forces_predicted)

    return
Exemple #4
0
    def query(self, atoms=None, print_time=True):

        if print_time:
            start = time.time()

        # kcal/mol til ev
        # kcal/mol/aangstrom til ev / aangstorm
        conv_energy = 0.0433635093659
        conv_force = 0.0433635093659

        coordinates = atoms.get_positions()
        nuclear_charges = atoms.get_atomic_numbers()
        n_atoms = coordinates.shape[0]

        new_cut = 4.0

        cut_parameters = {
            "rcut": new_cut,
            "acut": new_cut,
            # "nRs2": int(24 * new_cut / 8.0),
            # "nRs3": int(20 * new_cut / 8.0),
        }

        rep, drep = generate_fchl_acsf(nuclear_charges,
                                       coordinates,
                                       gradients=True,
                                       elements=[1, 6, 8],
                                       pad=self.max_atoms,
                                       **cut_parameters)

        # Put data into arrays
        Qs = [nuclear_charges]
        Xs = np.array([rep], order="F")
        dXs = np.array([drep], order="F")

        # Get kernels
        Kse = get_atomic_local_kernel(self.repr, Xs, self.charges, Qs,
                                      self.sigma)
        Ks = get_atomic_local_gradient_kernel(self.repr, Xs, dXs, self.charges,
                                              Qs, self.sigma)

        # Energy prediction
        energy_predicted = np.dot(Kse, self.alphas)[0] + self.offset
        self.energy = energy_predicted * conv_energy

        # Force prediction
        forces_predicted = np.dot(Ks, self.alphas).reshape((n_atoms, 3))
        self.forces = forces_predicted * conv_force

        if print_time:
            end = time.time()
            print("qml query {:7.3f}s {:10.3f} ".format(
                end - start, energy_predicted))

        return
Exemple #5
0
    def _get_rep(self, confid):
        """ Lazily build representations for result conformers."""
        if confid not in self._rep_cache:
            coords = np.array(
                self._dataset['conformers'][confid]['geo']).reshape(-1, 3)
            self._rep_cache[confid] = generate_fchl_acsf(self._charges,
                                                         coords,
                                                         pad=len(
                                                             self._charges))

        return self._rep_cache[confid]
Exemple #6
0
 def _is_duplicate(self, haystack, needle):
     """ Accurate, yet expensive comparison operation. Checks for equivalents of the geometry needle in the list of conformers haystack."""
     rep = generate_fchl_acsf(self._charges, needle, pad=len(self._charges))
     reps = [self._get_rep(confid) for confid in haystack]
     if len(reps) == 0:
         return False
     sim = get_global_kernel(np.array([rep]), np.array(reps),
                             np.array([self._charges]),
                             np.array([list(self._charges)] * len(reps)),
                             QML_FCHL_SIGMA)
     return sim
Exemple #7
0
def get_representation(atoms, coordinates, **kwargs):
    """

    atoms
    coordinates
    max_atoms

    """

    max_atoms = kwargs.get("max_atoms", len(atoms))
    rep = generate_fchl_acsf(atoms, coordinates, pad=max_atoms)

    return rep
    def query(self, atoms=None):

        if self.debug:
            start = time.time()

        # kcal/mol til ev
        # kcal/mol/aangstrom til ev / aangstorm
        conv_energy = 0.0433635093659
        conv_force = 0.0433635093659

        coordinates = atoms.get_positions()
        nuclear_charges = atoms.get_atomic_numbers()
        n_atoms = coordinates.shape[0]

        # Calculate representation for query molecule
        rep, drep = generate_fchl_acsf(nuclear_charges,
                                       coordinates,
                                       gradients=True,
                                       **self.parameters)

        # Put data into arrays
        Qs = [nuclear_charges]
        Xs = np.array([rep], order="F")
        dXs = np.array([drep], order="F")

        # Get kernels
        Kse = get_atomic_local_kernel(self.repr, Xs, self.charges, Qs,
                                      self.sigma)
        Ks = get_atomic_local_gradient_kernel(self.repr, Xs, dXs, self.charges,
                                              Qs, self.sigma)

        # Energy prediction
        energy_predicted = np.dot(Kse, self.alphas)[0] + self.offset
        self.energy = energy_predicted * conv_energy

        # Force prediction
        forces_predicted = np.dot(Ks, self.alphas).reshape((n_atoms, 3))
        self.forces = forces_predicted * conv_force

        if self.debug:
            end = time.time()
            print("fchl19 query {:7.3f}s {:10.3f} ".format(
                end - start, energy_predicted))

        return
def read_csv_file(filename, n=32, parameters=DEFAULT_PARAMETERS):
    """

    """

    df = pd.read_csv(filename, sep=";")

    max_n = len(df["atomization_energy"])
    n = min(max_n, n)

    random_indexes = np.random.choice(max_n, size=n, replace=False)

    representations = []
    d_representations = []
    charges = []

    energies = []
    forces = []

    for i in random_indexes:

        # atomistic
        coordinates = np.array(ast.literal_eval(df["coordinates"][i]))
        nuclear_charges = np.array(ast.literal_eval(df["nuclear_charges"][i]),
                                   dtype=np.int32)
        atomtypes = ast.literal_eval(df["atomtypes"][i])

        # properties
        force = np.array(ast.literal_eval(df["forces"][i]))
        energy = float(df["atomization_energy"][i])

        # calculate representations
        rep, drep = generate_fchl_acsf(nuclear_charges,
                                       coordinates,
                                       gradients=True,
                                       **parameters)

        #
        representations.append(rep)
        d_representations.append(drep)
        charges.append(nuclear_charges)
        energies.append(energy)
        forces.append(force)

    return representations, d_representations, charges, energies, forces
Exemple #10
0
def get_data_from_file(filename, n=100):

    data = np.load(filename)

    X = []
    dX = []
    Q = []

    E = []
    F = []

    max_n = len(data["E"])

    index = np.random.choice(max_n, size=n, replace=False)

    nuclear_charges = data["z"]
    # max_atoms = len(nuclear_charges)

    for i in index:

        coordinates = data["R"][i]

        (rep, drep) = generate_fchl_acsf(nuclear_charges,
                                         coordinates,
                                         gradients=True,
                                         pad=MAX_ATOMS,
                                         elements=[1, 6, 8])

        X.append(rep)
        dX.append(drep)
        Q.append(nuclear_charges)
        E.append(data["E"][i])
        F.append(data["F"][i])

        # print(coordinates)
        # print(data["E"][i])
        # print(data["F"][i])

    X = np.array(X)
    dX = np.array(dX)
    E = np.array(E).flatten()
    F = np.array(F)

    return X, dX, Q, E, F
Exemple #11
0
    def get_potential_energy(self, atoms=None, force_consistent=False):
        x = []
        disp_x = []
        q = []

        #		x1 = generate_fchl_acsf(atoms.get_atomic_numbers(), atoms.get_positions(), gradients=False, pad=9, elements=[1,6,7,9,17,35])
        x1 = generate_fchl_acsf(atoms.get_atomic_numbers(),
                                atoms.get_positions(),
                                gradients=False,
                                pad=self.nAtoms)
        x.append(x1)
        q.append(atoms.get_atomic_numbers())

        Xs = np.array(x)
        Qs = q

        Kse = get_atomic_local_kernel(self.X, Xs, self.Q, Qs, self.sigmas)
        energy = (float(np.dot(Kse, self.alphas))) * convback_E

        return energy
Exemple #12
0
    def _repr_wrapper(frame,
                      elements,
                      nRs2=24,
                      nRs3=20,
                      nFourier=1,
                      eta2=0.32,
                      eta3=2.7,
                      zeta=np.pi,
                      rcut=8.0,
                      acut=8.0,
                      two_body_decay=1.8,
                      three_body_decay=0.57,
                      three_body_weight=13.4,
                      stride=1):

        nuclear_charges, coordinates = frame.get_atomic_numbers(
        ), frame.get_positions()
        rep = generate_fchl_acsf(nuclear_charges,
                                 coordinates,
                                 elements,
                                 nRs2=nRs2,
                                 nRs3=nRs3,
                                 nFourier=nFourier,
                                 eta2=eta2,
                                 eta3=eta3,
                                 zeta=zeta,
                                 rcut=rcut,
                                 acut=acut,
                                 two_body_decay=two_body_decay,
                                 three_body_decay=three_body_decay,
                                 three_body_weight=three_body_weight,
                                 pad=False,
                                 gradients=False)
        rep_out = np.zeros((rep.shape[0], len(elements), rep.shape[1]))

        for i, z in enumerate(nuclear_charges):
            j = np.where(np.equal(z, elements))[0][0]
            rep_out[i, j] = rep[i]
        rep_out = rep_out.reshape(len(rep_out), -1)
        return rep_out
Exemple #13
0
    def get_forces(self, atoms=None):
        x = []
        disp_x = []
        q = []

        #		(x1, dx1) = generate_fchl_acsf(atoms.get_atomic_numbers(), atoms.get_positions(), gradients=True, pad=9, elements=[1,6,7,9,17,35])
        (x1, dx1) = generate_fchl_acsf(atoms.get_atomic_numbers(),
                                       atoms.get_positions(),
                                       gradients=True,
                                       pad=self.nAtoms)
        x.append(x1)
        disp_x.append(dx1)
        q.append(atoms.get_atomic_numbers())

        Xs = np.array(x)
        dXs = np.array(disp_x)
        Qs = q

        Ks = get_atomic_local_gradient_kernel(self.X, Xs, dXs, self.Q, Qs,
                                              self.sigmas)
        self.fYs = np.dot(Ks, self.alphas)
        Fss = self.fYs.reshape((self.nAtoms, 3)) * convback

        return Fss
Exemple #14
0
#from tutorial_data_2files import compounds
#from tutorial_data_2files import energy_cc2
#from tutorial_data_2files import energy_delta

if __name__ == "__main__":

    # For every compound generate a coulomb matrix
    Qall = []
    print('Generating representations')
    #for mol in tqdm.tqdm(compounds):
    for mol in compounds:

        #mol.generate_coulomb_matrix(size=29, sorting="row-norm")
        mol.representation = generate_fchl_acsf(mol.nuclear_charges,
                                                mol.coordinates,
                                                gradients=False,
                                                pad=33,
                                                elements=[1, 6, 7, 8])
        # mol.generate_bob(size=23, asize={"O":3, "C":7, "N":3, "H":16, "S":1})
        Qall.append(mol.nuclear_charges)

    # Make a big 2D array with all the
    X = np.array([mol.representation for mol in compounds])
    # X = np.array([mol.bob for mol in compounds])

    #split into training/validation and final test
    N_train_val = len(compounds)
    N_final_test = len(compounds)
    X_train_val = X[:N_train_val]
    Q_train_val = Qall[:N_train_val]
    Y_train_val = energy_cc2[:N_train_val]
Exemple #15
0
    def _do_workpackage(self, molname, dihedrals, resolution):
        ndih = len(dihedrals)
        start, step, n_steps = self._clockwork(resolution)
        scanangles = np.arange(start, start + step * n_steps, step)

        # fetch input
        self._sdfstr, self._torsions, self._bonds, self._smiles, bytecost = _fetch_problem_description(
            self._connection, molname)
        if _fetch_problem_description.cache_info().hits > 0:
            bytecost = 0

        accepted_geometries = []
        accepted_energies = []
        accepted_bondorders = []
        accepted_reps = []
        for angles in it.product(scanangles, repeat=ndih):
            try:
                xyzfile, atoms, coordinates = self._get_classical_constrained_geometry(
                    dihedrals, angles)
                geometry, energy = self._xtbgeoopt(xyzfile, 0)
            except:
                continue
            try:
                energy = float(energy)
            except ValueError:
                continue

            # require same molecule
            try:
                newsmiles = self._get_smiles(geometry)
            except:
                continue
            if newsmiles != self._smiles:
                continue

            # check for similar energies in list
            compare_required = np.where(
                np.abs(np.array(accepted_energies) -
                       energy) < ENERGY_THRESHOLD)[0]
            charges = [{"H": 1, "C": 6, "N": 7, "O": 8}[_] for _ in atoms]
            rep = generate_fchl_acsf(charges, coordinates, pad=len(atoms))
            include = True
            if len(compare_required) > 0:
                sim = get_global_kernel(
                    np.array([rep]),
                    np.array(accepted_reps)[compare_required],
                    np.array([charges]),
                    np.array([charges] * len(compare_required)),
                    QML_FCHL_SIGMA,
                )
                if np.max(sim) > QML_FCHL_THRESHOLD:
                    include = False
            if include:
                accepted_energies.append(energy)
                accepted_geometries.append(self._condense_geo(geometry))
                accepted_reps.append(rep)

        results = {}
        results["mol"] = molname
        results["dih"] = dihedrals
        results["res"] = resolution
        results["geo"] = accepted_geometries
        results["ene"] = accepted_energies
        return results, bytecost
def train():
    #	print(" -> Start training")
    #	start = time()
    #	subprocess.Popen(("python3","model_training.py","train"))
    #	end = time()
    #
    #	total_runtime = end - start
    #
    #	print(" -> Training time: {:.3f}".format(total_runtime))
    #data = get_properties("energies.txt")
    data = get_properties("train")
    mols = []
    mols_pred = []

    SIGMA = 2.5  #float(sys.argv[1])

    for name in sorted(data.keys()):
        mol = qml.Compound()
        mol.read_xyz("xyz/" + name + ".xyz")

        # Associate a property (heat of formation) with the object
        mol.properties = data[name][0]
        mols.append(mol)

    shuffle(mols)

    #mols_train = mols[:400]
    #mols_test = mols[400:]

    # REPRESENTATIONS
    print("\n -> calculate representations")
    start = time()
    x = []
    disp_x = []
    f = []
    e = []
    q = []

    for mol in mols:
        (x1, dx1) = generate_fchl_acsf(mol.nuclear_charges,
                                       mol.coordinates,
                                       gradients=True,
                                       pad=23,
                                       elements=[1, 6, 7, 8, 16, 17])

        e.append(mol.properties)
        f.append(data[(mol.name)[4:-4]][1])
        x.append(x1)
        disp_x.append(dx1)
        q.append(mol.nuclear_charges)

    X_train = np.array(x)
    F_train = np.array(f)
    F_train *= -1
    E_train = np.array(e)
    dX_train = np.array(disp_x)
    Q_train = q

    E_mean = np.mean(E_train)

    E_train -= E_mean

    F_train = np.concatenate(F_train)

    end = time()

    print(end - start)
    print("")
    print(" -> calculating Kernels")

    start = time()
    Kte = get_atomic_local_kernel(X_train, X_train, Q_train, Q_train, SIGMA)
    #Kte_test = get_atomic_local_kernel(X_train,  X_test, Q_train,  Q_test,  SIGMA)

    Kt = get_atomic_local_gradient_kernel(X_train, X_train, dX_train, Q_train,
                                          Q_train, SIGMA)
    #Kt_test = get_atomic_local_gradient_kernel(X_train,  X_test, dX_test,  Q_train,  Q_test, SIGMA)

    C = np.concatenate((Kte, Kt))

    Y = np.concatenate((E_train, F_train.flatten()))
    end = time()
    print(end - start)
    print("")

    print("Alphas operator ...")
    start = time()
    alpha = svd_solve(C, Y, rcond=1e-12)
    end = time()
    print(end - start)
    print("")

    print("save X")
    np.save('X_active_learning.npy', X_train)
    #    with open("X_mp2.cpickle", 'wb') as f:
    #      cPickle.dump(X_train, f, protocol=2)

    print("save alphas")
    np.save('alphas_active_learning.npy', alpha)
    #    with open("alphas_mp2.cpickle", 'wb') as f:
    #      cPickle.dump(alpha, f, protocol=2)

    print("save Q")
    np.save('Q_active_learning.npy', Q_train)
    #    with open("Q_mp2.cpickle", 'wb') as f:
    #      cPickle.dump(Q_train, f, protocol=2)

    eYt = np.dot(Kte, alpha)
    fYt = np.dot(Kt, alpha)
    #eYt_test = np.dot(Kte_test, alpha)
    #fYt_test = np.dot(Kt_test, alpha)

    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        E_train, eYt)
    print("TRAINING ENERGY   MAE = %10.4f  slope = %10.4f  intercept = %10.4f  r^2 = %9.6f" % \
            (np.mean(np.abs(E_train - eYt)), slope, intercept, r_value ))

    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        F_train.flatten(), fYt.flatten())
    print("TRAINING FORCE    MAE = %10.4f  slope = %10.4f  intercept = %10.4f  r^2 = %9.6f" % \
             (np.mean(np.abs(F_train.flatten() - fYt.flatten())), slope, intercept, r_value ))
Exemple #17
0
def predict_only():

    # Initialize training data (only need to do this once)
    alpha = np.load("data/training_alphas.npy")
    X = np.load("data/training_X.npy")
    Q = np.load("data/training_Q.npy")

    # Define a molecule
    nuclear_charges = np.array([6, 6, 8, 1, 1, 1, 1, 1, 1])
    coordinates = np.array([[0.07230959, 0.61441211, -0.03115568],
                            [-1.26644639, -0.27012846, -0.00720771],
                            [1.11516977, -0.30732869, 0.06414394],
                            [0.10673943, 1.44346835, -0.79573006],
                            [-0.02687486, 1.19350887, 0.98075343],
                            [-2.06614011, 0.38757505, 0.39276693],
                            [-1.68213881, -0.60620688, -0.97804526],
                            [-1.18668224, -1.07395366, 0.67075071],
                            [1.37492532, -0.56618891, -0.83172035]])

    # Generate representation
    max_atoms = X.shape[1]
    (rep, drep) = generate_fchl_acsf(nuclear_charges,
                                     coordinates,
                                     gradients=True,
                                     pad=max_atoms)

    # Put data into arrays
    Qs = [nuclear_charges]
    Xs = np.array([rep])
    dXs = np.array([drep])

    SIGMA = 10.0

    # Get kernels
    Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA)
    Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA)

    # Offset from training
    offset = -97084.83100465109

    # Energy prediction
    energy_predicted = np.dot(Kse, alpha)[0] + offset

    energy_true = -97086.55524903

    print("True energy      %16.4f kcal/mol" % energy_true)
    print("Predicted energy %16.4f kcal/mol" % energy_predicted)

    # Force prediction
    forces_predicted = np.dot(Ks, alpha).reshape((len(nuclear_charges), 3))

    forces_true = np.array([[-66.66673100, 2.45752385, 49.92224945],
                            [-17.98600137, 68.72856500, -28.82689294],
                            [31.88432927, 8.98739402, -18.11946195],
                            [4.19798833, -31.31692744, 8.12825145],
                            [16.78395377, -24.76072606, -38.99054658],
                            [6.03046276, -7.24928076, -3.88797517],
                            [17.44954868, 0.21604968, 8.56118603],
                            [11.73901551, -19.38200606, 13.26191987],
                            [-3.43256595, 2.31940789, 9.95126984]])

    print("True forces [kcal/mol]")
    print(forces_true)
    print("Predicted forces [kcal/mol]")
    print(forces_predicted)
    for xyz_file in sorted(data.keys()):
        mol = qml.Compound()
        mol.read_xyz(xyz_file)
        mol.properties = data[xyz_file]
        mol.name = xyz_file
        mols.append(mol)

    x = []
    q = []

    list_of_elements = [1, 5, 6, 7, 8, 9, 17, 35]
    for mol in mols:
        x1 = generate_fchl_acsf(mol.nuclear_charges,
                                mol.coordinates,
                                gradients=False,
                                pad=21,
                                elements=list_of_elements)
        x.append(x1)
        q.append(mol.nuclear_charges)

    X = np.array(x)
    Q = q

    K = get_global_kernel(X, X, Q, Q, .64)
    Y = np.asarray([mol.properties for mol in mols])

    lst = [mol.name for mol in mols]
    lst_old = len(lst)

    for i in range(len(Y)):
def repr_wrapper(frame,
                 elements,
                 is_periodic=False,
                 nRs2=24,
                 nRs3=20,
                 nFourier=1,
                 eta2=0.32,
                 eta3=2.7,
                 zeta=np.pi,
                 rcut=8.0,
                 acut=8.0,
                 two_body_decay=1.8,
                 three_body_decay=0.57,
                 three_body_weight=13.4,
                 stride=1):
    '''
   Periodic systems not implemented for FCHL19.
    :frame: ase Atoms class
    :param elements: list of unique nuclear charges (atom types)
    :type elements: numpy array
    :is_periodic: Boolean determining Whether the system is periodic.
    :type Boolean:
    :param nRs2: Number of gaussian basis functions in the two-body terms
    :type nRs2: integer
    :param nRs3: Number of gaussian basis functions in the three-body radial part
    :type nRs3: integer
    :param nFourier: Order of Fourier expansion
    :type nFourier: integer
    :param eta2: Precision in the gaussian basis functions in the two-body terms
    :type eta2: float
    :param eta3: Precision in the gaussian basis functions in the three-body radial part
    :type eta3: float
    :param zeta: Precision parameter of basis functions in the three-body angular part
    :type zeta: float
    :param two_body_decay: exponential decay for the two body function
    :type two_body_decay: float
    :param three_body_decay: exponential decay for the three body function
    :type three_body_decay: float
    :param three_body_weight: relative weight of the three body function
    :type three_body_weight: float
   '''

    if is_periodic:
        raise NotImplementedError('Periodic system not implemented!')

    nuclear_charges, coordinates = frame.get_atomic_numbers(
    ), frame.get_positions()
    rep = generate_fchl_acsf(nuclear_charges,
                             coordinates,
                             elements,
                             nRs2=nRs2,
                             nRs3=nRs3,
                             nFourier=nFourier,
                             eta2=eta2,
                             eta3=eta3,
                             zeta=zeta,
                             rcut=rcut,
                             acut=acut,
                             two_body_decay=two_body_decay,
                             three_body_decay=three_body_decay,
                             three_body_weight=three_body_weight,
                             pad=False,
                             gradients=False)
    rep_out = np.zeros((rep.shape[0], len(elements), rep.shape[1]))

    for i, z in enumerate(nuclear_charges):
        j = np.where(np.equal(z, elements))[0][0]
        rep_out[i, j] = rep[i]
    rep_out = rep_out.reshape(len(rep_out), -1)
    return rep_out
Exemple #20
0
def overview_properties_pca():

    elements = []

    with open('data/sdf/subset_properties.csv', 'r') as f:
        properties = f.readlines()
        properties = [float(x) for x in properties]
        properties = np.array(properties)

    representations = []
    molobjs = cheminfo.read_sdffile("data/sdf/subset_structures.sdf")

    mols_atoms = []
    mols_coord = []

    n_atoms = 0
    n_items = 500

    for i, molobj in enumerate(molobjs):

        atoms, coord = cheminfo.molobj_to_xyz(molobj)

        mols_atoms.append(atoms)
        mols_coord.append(coord)

        elements += list(np.unique(atoms))
        elements = list(np.unique(elements))

        if len(atoms) > n_atoms:
            n_atoms = len(atoms)

        i += 1
        if i == n_items:
            break

    properties = properties[:n_items]

    print(elements)
    print(n_atoms)
    print(len(mols_atoms))

    distance_cut = 20.0
    parameters = {
        "pad": n_atoms,
        'nRs2': 22,
        'nRs3': 17,
        'eta2': 0.41,
        'eta3': 0.97,
        'three_body_weight': 45.83,
        'three_body_decay': 2.39,
        'two_body_decay': 2.39,
        "rcut": distance_cut,
        "acut": distance_cut,
        "elements": elements
    }

    for atoms, coord in zip(mols_atoms, mols_coord):
        representation = generate_fchl_acsf(atoms, coord, **parameters)
        representations.append(representation)

    representations = np.array(representations)

    sigma = 10.

    kernel = qml.kernels.get_local_kernel(representations, representations,
                                          mols_atoms, mols_atoms, sigma)

    print(kernel.shape)

    pca = kpca(kernel, n=2)

    fig, axs = plt.subplots(2, 1, figsize=(5, 10))
    sc = axs[0].scatter(*pca, c=properties)
    fig.colorbar(sc, ax=axs[0])
    im = axs[1].imshow(kernel)
    fig.colorbar(im, ax=axs[1])
    fig.savefig("_tmp_pca_prop.png")

    return
Exemple #21
0
    def query(self, atoms=None, print_time=True):

        if print_time:
            start = time.time()

        # kcal/mol til ev
        # kcal/mol/aangstrom til ev / aangstorm
        conv_energy = 1.0  #0.0433635093659
        conv_force = 1.0  # 0.0433635093659

        coordinates = atoms.get_positions()
        nuclear_charges = atoms.get_atomic_numbers()
        n_atoms = coordinates.shape[0]

        rep_start = time.time()

        rep, drep = generate_fchl_acsf(
            nuclear_charges,
            coordinates,
            gradients=True,
            elements=[1, 6, 8],
            pad=self.max_atoms,
        )

        Qs = [nuclear_charges]
        Xs = np.array([rep], order="F")
        dXs = np.array([drep], order="F")

        if self.reducer is not None:
            Xs = np.einsum("ijk,kl->ijl", Xs, self.reducer)
            dXs = np.einsum("ijkmn,kl->ijlmn", dXs, self.reducer)

        rep_end = time.time()

        kernel_start = time.time()
        # Ks = get_gp_kernel(self.repr, Xs, self.drepr, dXs, self.charges, Qs, self.sigma)

        Kse = get_atomic_local_kernel(self.repr, Xs, self.charges, Qs,
                                      self.sigma)
        Ksf = get_atomic_local_gradient_kernel(self.repr, Xs, dXs,
                                               self.charges, Qs, self.sigma)

        kernel_end = time.time()

        pred_start = time.time()
        # Energy prediction
        energy_predicted = np.dot(Kse, self.alphas)[0] + self.offset
        self.energy = energy_predicted * conv_energy

        # Force prediction
        forces_predicted = np.dot(Ksf, self.alphas).reshape((n_atoms, 3))
        self.forces = forces_predicted * conv_force

        pred_end = time.time()

        if print_time:
            end = time.time()
            # print("rep        ", rep_end - rep_start)
            # print("kernel     ", kernel_end - kernel_start)
            # print("prediciton ", pred_end - pred_start)
            # print("qml query {:7.3f}s {:10.3f} ".format(end-start, energy_predicted))

        return
Exemple #22
0
def prepare_training_data_qmepa890():

    # distance_cut = 10.0
    # parameters = {
    #     "pad": 25, # max atoms
    #     "rcut": distance_cut,
    #     "acut": distance_cut,
    #     "elements": [1, 6, 7, 8],
    # }

    # Table 5. Free atom energies from DFT/PBE0/def2TZVP.
    # H   C   N   O   S
    # Multiplicity    2   3   4   3   3
    # Energy / Eh     −0.501036   −37.8054    −54.5438    −75.0186    −397.974

    au2kcal = 627.518135759111

    atom_energies = {}
    atom_energies["H"] = -0.501036 * au2kcal
    atom_energies["C"] = -37.8054 * au2kcal
    atom_energies["N"] = -54.5438 * au2kcal
    atom_energies["O"] = -75.0186 * au2kcal
    atom_energies["S"] = -397.974 * au2kcal

    distance_cut = 20.0
    parameters = {
        "pad": 25,
        'nRs2': 22,
        'nRs3': 17,
        'eta2': 0.41,
        'eta3': 0.97,
        'three_body_weight': 45.83,
        'three_body_decay': 2.39,
        'two_body_decay': 2.39,
        "rcut": distance_cut,
        "acut": distance_cut,
        "elements": [1, 6, 7, 8, 12]
    }

    dirprefix = "data/qmepa890/"
    filename = dirprefix + "data.csv"

    # 1. File ID (e.g. 0415 means the information pertains to the files `0415.xyz` and `0415_+.xyz`)
    # 2. Index of the proton (in the `XXXX_+.xyz` file listed in the same row)
    # 3. Gas-phase energy of neutral molecule plus thermal corrections from vibrational analysis
    # 4. Gas-phase energy of protonated molecule plus thermal corrections from vibrational analysis
    # 5. Gas-phase energy of neutral molecule
    # 6. Gas-phase energy of protonated molecule
    # 7. Energy of neutral molecule using SMD implicit solvent model
    # 8. Energy of protonated molecule using SMD implicit solvent model
    # 9. PM6 heat-of-formation of neutral molecule using COSMO implicit solvent model
    # 10. PM6 heat-of-formation of protonated molecule using COSMO implicit solvent model

    df = pd.read_csv(filename, sep=",", header=None)

    molecule_names = df.iloc[:, 0]
    proton_idxs = df.iloc[:, 1]
    energies = df.iloc[:, 2:]

    p_representations = []
    p_coord_list = []
    p_atoms_list = []

    n_representations = []
    n_coord_list = []
    n_atoms_list = []

    atomization_list = []

    for h_idx, name in zip(proton_idxs, molecule_names):

        name = str(name).zfill(4)
        print(f"representing {name}")

        atoms, coord = rmsd.get_coordinates_xyz(dirprefix + "structures/" +
                                                name + ".xyz")

        atom_energy = 0
        for atom in atoms:
            atom_energy += atom_energies[atom]

        atomization_list.append(atom_energy)

        atoms = [cheminfo.convert_atom(atom) for atom in atoms]
        n_representation = generate_fchl_acsf(atoms, coord, **parameters)
        n_representations.append(n_representation)
        n_coord_list.append(coord)
        n_atoms_list.append(atoms)

        atoms, coord = rmsd.get_coordinates_xyz(dirprefix + "structures/" +
                                                name + "_+.xyz")
        atoms = [cheminfo.convert_atom(atom) for atom in atoms]
        atoms[h_idx - 1] = 12
        p_representation = generate_fchl_acsf(atoms, coord, **parameters)
        p_representations.append(n_representation)
        p_coord_list.append(coord)
        p_atoms_list.append(atoms)

    proton_idxs = np.array(proton_idxs)

    n_representations = np.array(n_representations)
    p_representations = np.array(p_representations)

    atomization_list = np.array(atomization_list)

    return n_representations, p_representations, n_coord_list, p_coord_list, n_atoms_list, p_atoms_list, proton_idxs, energies, atomization_list
Exemple #23
0
def prepare_training_data_protonafinity():

    distance_cut = 20.0
    parameters = {
        "pad": 25,
        'nRs2': 22,
        'nRs3': 17,
        'eta2': 0.41,
        'eta3': 0.97,
        'three_body_weight': 45.83,
        'three_body_decay': 2.39,
        'two_body_decay': 2.39,
        "rcut": distance_cut,
        "acut": distance_cut,
        "elements": [1, 6, 7, 8, 9, 12]
    }

    dirprefix = "data/dataset-proton-affinity/data/"
    filename = dirprefix + "pm3_properties.csv"
    df = pd.read_csv(filename, sep=",")

    n_rows = df.shape[0]

    # column names
    col_neuidx = "MoleculeIdx"
    col_proidx = "ProtonatedIdx"
    col_refsmi = "ReferenceSmiles"
    col_prosmi = "ProtonatedSmiles"
    col_neueng = "NeutralEnergy"
    col_proeng = "ProtonatedEnergy"

    # Collect energies
    energies_neutr = df[col_neueng]
    energies_proto = df[col_proeng]

    energies = [energies_neutr, energies_proto]
    energies = np.array(energies)

    # Protonated representation
    p_representations = []
    p_coord_list = []
    p_atoms_list = []

    # Neutral representation
    n_representations = []
    n_coord_list = []
    n_atoms_list = []

    for idx, row in tqdm.tqdm(df.iterrows(),
                              desc="Preparing FCHL19",
                              total=n_rows,
                              **TQDM_OPTIONS):

        # print(row)

        nidx = row[col_neuidx]
        pidx = row[col_proidx]

        nname = f"xyz{nidx}_n.xyz"
        pname = f"xyz{nidx}_{pidx}.xyz"

        # Neutral state
        atoms, coord = rmsd.get_coordinates_xyz(dirprefix + "pm3.cosmo.mop/" +
                                                nname)
        atoms = [cheminfo.convert_atom(atom) for atom in atoms]

        n_representation = generate_fchl_acsf(atoms, coord, **parameters)
        n_representations.append(n_representation)
        n_coord_list.append(coord)
        n_atoms_list.append(atoms)

        # Protonated state
        atoms, coord = rmsd.get_coordinates_xyz(dirprefix + "pm3.cosmo.mop/" +
                                                pname)
        atoms = [cheminfo.convert_atom(atom) for atom in atoms]

        # Find protonated atom
        smiles = row[col_prosmi]
        molobj = cheminfo.smiles_to_molobj(smiles)

        assert molobj is not None, "Molobj failed for {smiles}"

        smi_atoms = molobj.GetAtoms()
        atom_charges = [atom.GetFormalCharge() for atom in smi_atoms]
        atom_charges = np.array(atom_charges)
        idx, = np.where(atom_charges > 0)

        assert len(idx) == 1, f"Should only be one charged atom in {pname}"

        idx = idx[0]

        # Set nitrogen to heavy atom
        atoms[idx] = 12

        p_representation = generate_fchl_acsf(atoms, coord, **parameters)
        p_representations.append(n_representation)
        p_coord_list.append(coord)
        p_atoms_list.append(atoms)

    # proton_idxs = np.array(proton_idxs)

    n_representations = np.array(n_representations)
    p_representations = np.array(p_representations)

    return n_representations, p_representations, n_coord_list, p_coord_list, n_atoms_list, p_atoms_list, energies