コード例 #1
0
    def add_mol_to_training(self, new_system, pun, atom=None, xyz=None):
        'Add molecule to training set'
        new_system.initialize_multipoles()

        # Don't build SLATM yet, only add information to mbtypes
        mol = None
        if new_system.xyz[0] is None:
            if xyz is not None:
                mol = qml.Compound(xyz)
            else:
                raise ValueError("Missing xyz file")
        else:
            mol = qml.Compound(new_system.xyz[0])
        self.qml_mols.append(mol)
        if atom is None:
            self.qml_filter_ele.append([1 for i in range(mol.natoms)])
        else:
            self.qml_filter_ele.append([
                1 if (str(mol.atomtypes[i]) == atom) else 0
                for i in range(mol.natoms)
            ])
        new_system.multipoles = np.empty((new_system.num_atoms, 9))
        # Read in multipole moments from txt file
        new_system.load_mtp_from_hipart(pun, rotate=False)
        if len(new_system.multipoles) != new_system.num_atoms:
            raise Exception("Wrong number of charges in %s" % (pun))

        for i in range(len(new_system.elements)):
            ele_i = new_system.elements[i]
            if (ele_i == atom) or atom is None:
                if ele_i not in self.target_train.keys():
                    self.target_train[ele_i] = []
                    self.descr_train[ele_i] = []
                    self.num_mols_train[ele_i] = 0
                new_target_train = []
                # Rotate system until atom pairs point in all x,y,z directions
                vec_all_dir = new_system.compute_basis()
                # charge
                new_target_train.append([new_system.multipoles[i][0]])
                # dipole
                new_target_train.append(
                    np.dot(new_system.multipoles[i][1:4],
                           new_system.basis[i].T))
                # quadrupole
                tmp = np.dot(
                    np.dot(new_system.basis[i],
                           utils.spher_to_cart(new_system.multipoles[i][4:9])),
                    new_system.basis[i].T).reshape((9, ))
                new_target_train.append(tmp)
                self.target_train[ele_i].append(new_target_train)
            if atom in new_system.elements or atom is None:
                self.num_mols_train[ele_i] += 1
        self.logger.info("Added file to training set: %s" % new_system)
        return None
コード例 #2
0
ファイル: cml_to_qml.py プロジェクト: rsarm/cheml
def _to_qml(ds, nmol, sublist):
    """Returns a list of nmol qml.Molecule objects.

    * ds      :: dataset objects
    * nmol    :: number of molecules
    * sublist :: list of indices of molecules to be converted to
                 ase Atoms object.

    """

    list_of_mol = []

    if nmol == None:
        nmol = ds.nmol

    if sublist == None:
        sublist = ds.list_of_mol[:nmol]
    else:
        sublist = np.array(ds.list_of_mol)[sublist]

    for m in sublist:
        qmlc = qml.Compound()

        qmlc.natoms = m.natm
        qmlc.atomtypes = m.symb
        qmlc.nuclear_charges = m.z
        qmlc.coordinates = m.R

        list_of_mol.append(qmlc)

    return list_of_mol
コード例 #3
0
def find_similar_local_environments(filename, element=6):
    """ Returns a list of sets of atoms with similar environments. Atoms are identified by their zero-based atom index."""
    c = qml.Compound(xyz=filename)

    # relevant atoms
    atoms = np.where(c.nuclear_charges == element)[0]
    if len(atoms) < 2:
        return []

    # get coulomb matrix
    a = qml.representations.generate_coulomb_matrix(c.nuclear_charges,
                                                    c.coordinates,
                                                    size=c.natoms,
                                                    sorting='unsorted')

    # reconstruct full symmetric matrix
    s = np.zeros((c.natoms, c.natoms))
    s[np.tril_indices(c.natoms)] = a
    d = np.diag(s)
    s += s.T
    s[np.diag_indices(c.natoms)] = d

    # find similar sites
    accepted = nx.Graph()
    sorted_elements = [np.sort(_) for _ in s[atoms]]
    for i in range(len(atoms)):
        for j in range(i + 1, len(atoms)):
            dist = np.linalg.norm(sorted_elements[i] - sorted_elements[j])
            if dist < 1:
                accepted.add_edge(atoms[i], atoms[j])
    return [list(_.nodes) for _ in nx.connected_component_subgraphs(accepted)]
コード例 #4
0
ファイル: test_slatm.py プロジェクト: sb123456789sb/qml
def test_slatm_representation():

    files = [
        "qm7/0001.xyz", "qm7/0002.xyz", "qm7/0003.xyz", "qm7/0004.xyz",
        "qm7/0005.xyz", "qm7/0006.xyz", "qm7/0007.xyz", "qm7/0008.xyz",
        "qm7/0009.xyz", "qm7/0010.xyz"
    ]

    path = test_dir = os.path.dirname(os.path.realpath(__file__))

    print(path)
    mols = []
    for xyz_file in files:

        mol = qml.Compound(xyz=path + "/" + xyz_file)
        mols.append(mol)

    mbtypes = get_slatm_mbtypes(np.array([mol.nuclear_charges
                                          for mol in mols]))

    for i, mol in enumerate(mols):
        mol.generate_slatm(mbtypes)

    X_qml = np.array([mol.representation for mol in mols])
    X_ref = np.loadtxt(path + "/data/slatm_representation.txt")

    assert np.allclose(X_qml, X_ref), "Error in SLATM generation"
コード例 #5
0
def read_xyz_qml(pathway):
    '''function that reads all xyz files in pathway and returns list of Z, R, N information
    input
    -----
    pathway: string, pathway to folder containing '.xyz' files.
            ends with '/'

    output
    ------
    compoundlist: list containing compound information (qml element)
    ZRN_data: list containing Z, R and N arrays of the compounds
    '''
    compoundlist = []
    ZRN_data = []

    print("iterate over all molecules")
    for xyzfile in os.listdir(database):
        xyz_fullpath = database + xyzfile  #probably path can be gotten more directly
        compound = qml.Compound(xyz_fullpath)

        print("compound %s" % xyzfile)
        Z = compound.nuclear_charges.astype(float)
        R = compound.coordinates
        N = float(len(Z))

        compoundlist.append(compound)
        ZRN_data.append(Z, R, N)

    return (compoundlist, ZRN_data)
コード例 #6
0
def test_krr_cmat():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 300
    n_train = 700

    training = mols[:n_train]
    test = mols[-n_test:]

    # List of representations
    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 10**(4.2)
    llambda = 10**(-10.0)

    # Generate training Kernel
    K = laplacian_kernel(X, X, sigma)

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = laplacian_kernel(X, Xs, sigma)
    Yss = np.dot(Ks.transpose(), alpha)

    mae = np.mean(np.abs(Ys - Yss))

    assert mae < 6.0, "ERROR: Too high MAE!"
コード例 #7
0
def get_data():
    """" Generate coulomb matrices and heat of formation for QM7.
    """

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    X = np.array([mol.representation for mol in mols])
    Y = np.array([mol.properties for mol in mols])

    sigma = 10**(4.2)

    return X, Y, sigma
コード例 #8
0
def calc_coloumb_matrices(size, save_path=None):
    '''
    Return dict of CMs for each molecule in both train and test sets

    :param size: int
        Max num of atoms per molecule found in datasets
    :param save_path: str
        If provided will pickle computed CMs to save_path
    :return: Dict of 2D numpy arrays
        Dict where keys are molecule_names, values are 2d CMs
    '''

    CMs = {}
    for file in tqdm.tqdm(glob.glob('./data/structures/*.xyz')):
        mol_name = file.split('/')[-1].split('.')[0]
        mol = qml.Compound(xyz=file)

        # After experiments, seems upper triangle CM was concated in fortran-order
        mol.generate_coulomb_matrix(size=size, sorting='unsorted')
        cm_tri = mol.representation
        cm = inv_tri(cm_tri, size=size)

        # Concat to dict
        CMs[mol_name] = cm

    if save_path is not None:
        with open(save_path, 'wb') as h:
            pickle.dump(CMs, h, protocol=pickle.HIGHEST_PROTOCOL)

    return CMs
コード例 #9
0
ファイル: test_compound.py プロジェクト: sgangoly/qml-1
def test_compound():

    test_dir = os.path.dirname(os.path.realpath(__file__))
    c = qml.Compound(xyz=test_dir + "/data/compound_test.xyz")
    
    ref_atomtypes = ['C', 'Cl', 'Br', 'H', 'H']
    ref_charges = [ 6, 17, 35,  1 , 1]

    assert compare_lists(ref_atomtypes, c.atomtypes), "Failed parsing atomtypes"
    assert compare_lists(ref_charges, c.nuclear_charges), "Failed parsing nuclear_charges"
   
    # Test extended xyz
    c2 = qml.Compound(xyz=test_dir + "/data/compound_test.exyz")
    
    ref_atomtypes = ['C', 'Cl', 'Br', 'H', 'H']
    ref_charges = [ 6, 17, 35,  1 , 1]

    assert compare_lists(ref_atomtypes, c.atomtypes), "Failed parsing atomtypes"
    assert compare_lists(ref_charges, c.nuclear_charges), "Failed parsing nuclear_charges"
コード例 #10
0
    def add_mol_to_training(self, new_system, ref_ratios,atom = None):
        'Add molecule to training set'

        if self.mbtypes is None:
            raise ValueError("Missing MBTypes")

        mol = None
        # Init the molecule in qml
        if new_system.xyz[0] is None:
            if xyz is not None:
                mol = qml.Compound(xyz)
            else:
                raise ValueError("Missing xyz file")
        else:
            mol = qml.Compound(new_system.xyz[0])  

        self.qml_mols.append(mol)
        # build slatm representation
        mol.generate_slatm(self.mbtypes, rcut = self.cutoff, local=True)

        natom = 0
        for i in range(len(new_system.elements)):
            ele = new_system.elements[i]
            if (ele == atom) or atom is None:
                natom += 1 
                # reference pops/widths for element i
                hr = ref_ratios[i] 
                self.target_train[ele].append(hr)
                self.descr_train[ele].append(mol.representation[i])

                if len(self.descr_train[ele]) != len(self.target_train[ele]):
                    print(len(self.descr_train[ele]))
                    print(len(self.target_train[ele]))
                    print(self.descr_train[ele])
                    print(self.target_train[ele])
                    print("Inconsistency in training data")
                    raise ValueError("Inconsistency in training data")
        #self.descr_train += new_system.coulomb_mat
        #self.target_train += [i for i in new_system.hirshfeld_ref]

        self.logger.info("Added file to training set: %s" % new_system)
        return natom
コード例 #11
0
ファイル: test_arad.py プロジェクト: sb123456789sb/qml
def test_arad():
    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys())[:10]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm

        mol.representation = generate_arad_representation(
            mol.coordinates, mol.nuclear_charges)

        mols.append(mol)

    sigmas = [25.0]

    X1 = np.array([mol.representation for mol in mols])

    K_local_asymm = get_local_kernels_arad(X1, X1, sigmas)
    K_local_symm = get_local_symmetric_kernels_arad(X1, sigmas)

    assert np.allclose(K_local_symm,
                       K_local_asymm), "Symmetry error in local kernels"
    assert np.invert(np.all(np.isnan(
        K_local_asymm))), "ERROR: ARAD local symmetric kernel contains NaN"

    K_local_asymm = get_local_kernels_arad(X1[-4:], X1[:6], sigmas)

    molid = 5
    X1 = generate_arad_representation(mols[molid].coordinates,
                                      mols[molid].nuclear_charges,
                                      size=mols[molid].natoms)
    XA = X1[:mols[molid].natoms]

    K_atomic_asymm = get_atomic_kernels_arad(XA, XA, sigmas)
    K_atomic_symm = get_atomic_symmetric_kernels_arad(XA, sigmas)

    assert np.allclose(K_atomic_symm,
                       K_atomic_asymm), "Symmetry error in atomic kernels"
    assert np.invert(np.all(np.isnan(
        K_atomic_asymm))), "ERROR: ARAD atomic symmetric kernel contains NaN"

    K_atomic_asymm = get_atomic_kernels_arad(XA, XA, sigmas)
コード例 #12
0
ファイル: test_fchl.py プロジェクト: syyunn/qml
def test_krr_fchl_atomic():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:10]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.representation = generate_representation(mol.coordinates, \
                                mol.nuclear_charges, cut_distance=1e6)
        mols.append(mol)

    X = np.array([mol.representation for mol in mols])

    # Set hyper-parameters
    sigma = 2.5

    K = get_local_symmetric_kernels(X, [sigma])[0]

    K_test = np.zeros((len(mols), len(mols)))

    for i, Xi in enumerate(X):
        for j, Xj in enumerate(X):

            K_atomic = get_atomic_kernels(Xi[:mols[i].natoms],
                                          Xj[:mols[j].natoms], [sigma])[0]
            K_test[i, j] = np.sum(K_atomic)

            assert np.invert(np.all(
                np.isnan(K_atomic))), "FCHL atomic kernel contains NaN"

            if (i == j):
                K_atomic_symmetric = get_atomic_symmetric_kernels(
                    Xi[:mols[i].natoms], [sigma])[0]
                assert np.allclose(K_atomic, K_atomic_symmetric
                                   ), "Error in FCHL symmetric atomic kernels"
                assert np.invert(np.all(np.isnan(K_atomic_symmetric))
                                 ), "FCHL atomic symmetric kernel contains NaN"

    assert np.allclose(K, K_test), "Error in FCHL atomic kernels"
コード例 #13
0
ファイル: test_wrappers.py プロジェクト: sb123456789sb/qml
def test_arad_wrapper():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies("%s/data/hof_qm7.txt" % test_dir)

    # Generate a list of qml.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys())[:50]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz="%s/qm7/" % test_dir + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_arad_representation(size=23)

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 10
    n_train = 40

    training = mols[:n_train]
    test = mols[-n_test:]

    sigmas = [10.0, 100.0]

    K1 = arad_local_symmetric_kernels(training, sigmas)
    assert np.all(K1 > 0.0), "ERROR: ARAD symmetric kernel negative"
    assert np.invert(np.all(
        np.isnan(K1))), "ERROR: ARAD symmetric kernel contains NaN"

    K2 = arad_local_kernels(training, test, sigmas)
    assert np.all(K2 > 0.0), "ERROR: ARAD symmetric kernel negative"
    assert np.invert(np.all(
        np.isnan(K2))), "ERROR: ARAD symmetric kernel contains NaN"
コード例 #14
0
    def run(self, commandstring):
        if "ERROR" in commandstring:
            return "--"
        content = json.loads(commandstring)
        xyz = MockXYZ(content["neutralgeometry"].split("\n"))
        c = qml.Compound(xyz=xyz)
        rep = qml.representations.generate_fchl_acsf(
            c.nuclear_charges,
            c.coordinates,
            gradients=False,
            pad=31,
            elements=[1, 6, 8],
        )

        K = qml.kernels.get_local_kernel(self._reps, np.array([rep]), self._Qs,
                                         [c.nuclear_charges], 0.128)
        preds1 = np.dot(K, self._alphas1)[0]
        preds2 = np.dot(K, self._alphas2)[0]
        preds3 = np.dot(K, self._alphas3)[0]

        return str(preds1) + "," + str(preds2) + "," + str(preds3)
コード例 #15
0
def get_descriptor_and_property(filenames, atype, cutoff):
    X = []
    Y = []
    for filename in filenames:
        # Get the partial charges (property to predict)
        y = get_properties(filename, atype)
        # generate a Compound data structure
        mol = qml.Compound(filename)
        # generate the descriptor
        mol.generate_atomic_coulomb_matrix(central_cutoff=cutoff, size=30)
        # either use all atoms, or just atoms of a specific type
        if atype == "all":
            x = mol.representation
        else:
            x = mol.representation[mol.nuclear_charges ==
                                   qml.data.NUCLEAR_CHARGE[atype]]

        # add the atoms to be used in this molecule to the entire set
        X.extend(x)
        Y.extend(y)
    return np.asarray(X), np.asarray(Y)
コード例 #16
0
def test_representations():
    files = [
        "qm7/0101.xyz", "qm7/0102.xyz", "qm7/0103.xyz", "qm7/0104.xyz",
        "qm7/0105.xyz", "qm7/0106.xyz", "qm7/0107.xyz", "qm7/0108.xyz",
        "qm7/0109.xyz", "qm7/0110.xyz"
    ]

    path = test_dir = os.path.dirname(os.path.realpath(__file__))

    mols = []
    for xyz_file in files:
        mol = qml.Compound(xyz=path + "/" + xyz_file)
        mols.append(mol)

    size = max(mol.nuclear_charges.size for mol in mols) + 1

    asize = get_asize(mols, 1)

    coulomb_matrix(mols, size, path)
    atomic_coulomb_matrix(mols, size, path)
    eigenvalue_coulomb_matrix(mols, size, path)
    bob(mols, size, asize, path)
コード例 #17
0
    def __init__(self, connection):
        self.connection = connection
        # self._upload()
        # return "uploaded"

        lines = (gzip.decompress(
            self.connection.get("qml-structures")).decode("ascii").split("\n"))
        q = (gzip.decompress(self.connection.get("qml-alphas1")).decode(
            "ascii").strip().split("\n"))
        alphas1 = np.array([float(_) for _ in q])
        q = (gzip.decompress(self.connection.get("qml-alphas2")).decode(
            "ascii").strip().split("\n"))
        alphas2 = np.array([float(_) for _ in q])
        q = (gzip.decompress(self.connection.get("qml-alphas3")).decode(
            "ascii").strip().split("\n"))
        alphas3 = np.array([float(_) for _ in q])

        reps = []
        Qs = []
        for geoidx in range(len(alphas1)):
            c = qml.Compound(xyz=MockXYZ(lines[geoidx * 33:(geoidx + 1) * 33]))
            reps.append(
                qml.representations.generate_fchl_acsf(
                    c.nuclear_charges,
                    c.coordinates,
                    gradients=False,
                    pad=31,
                    elements=[1, 6, 8],
                ))
            Qs.append(c.nuclear_charges)

        self._reps = np.array(reps)
        self._Qs = np.array(Qs)
        self._alphas1 = alphas1
        self._alphas2 = alphas2
        self._alphas3 = alphas3
コード例 #18
0
ファイル: nucff.py プロジェクト: andersx/nucff
def test_engrad():

    dH = 1e-6

    comp = qml.Compound(xyz="water.xyz")

    energy, grad = get_engrad(comp.nuclear_charges, comp.coordinates)

    grad_numm = np.zeros(grad.shape)

    for i in range(len(comp.nuclear_charges)):
        for j in range(3):

            coords_displaced = deepcopy(comp.coordinates)
            coords_displaced[i, j] += dH
            e_plus = get_energy(comp.nuclear_charges, coords_displaced)

            coords_displaced = deepcopy(comp.coordinates)
            coords_displaced[i, j] -= dH
            e_minus = get_energy(comp.nuclear_charges, coords_displaced)

            grad_numm[i, j] = (e_plus - e_minus) / (2 * dH / BOHR_TO_ANGS)

    assert np.allclose(grad, grad_numm)
コード例 #19
0
ファイル: test_fchl.py プロジェクト: syyunn/qml
def test_krr_fchl_global():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:100]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.representation = generate_representation(mol.coordinates, \
                                mol.nuclear_charges, cut_distance=1e6)
        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = len(mols) // 3
    n_train = len(mols) - n_test

    training = mols[:n_train]
    test = mols[-n_test:]

    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 100.0
    llambda = 1e-8

    K_symmetric = get_global_symmetric_kernels(X, [sigma])[0]
    K = get_global_kernels(X, X, [sigma])[0]

    assert np.allclose(K,
                       K_symmetric), "Error in FCHL symmetric global kernels"
    assert np.invert(np.all(
        np.isnan(K_symmetric))), "FCHL global symmetric kernel contains NaN"
    assert np.invert(np.all(np.isnan(K))), "FCHL global kernel contains NaN"

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # # Calculate prediction kernel
    Ks = get_global_kernels(Xs, X, [sigma])[0]
    assert np.invert(np.all(
        np.isnan(Ks))), "FCHL global testkernel contains NaN"

    Yss = np.dot(Ks, alpha)

    mae = np.mean(np.abs(Ys - Yss))
    assert abs(2 - mae) < 1.0, "Error in FCHL global kernel-ridge regression"
コード例 #20
0
ファイル: qml_dr_obabel.py プロジェクト: dbushpw/qml
        hof = float(tokens[0])  #hof is the to predicting value
        #dftb = float(tokens[2])
        #print(i)
        if key == "dft":
            energies[xyz_name[i]] = hof
            #energies[xyz_name[i].split("/")[-1]] = hof

        #elif key=="delta":
        #energies[xyz_name] = hof - dftb
        else:
            energies[xyz_name[i]] = hof

    return energies


qm7_dft_energy = get_energies("obabel_dG.txt", key="dft")
#qm7_delta_energy = get_energies("hof_qm7.txt", key = "delta")

compounds = [qml.Compound(xyz=path + f) for f in sorted(os.listdir(path))]

for mol in compounds:
    mol.properties = qm7_dft_energy[mol.name]
    #mol.properties2 = qm7_delta_energy[mol.name]
#with open('obabel.pkl', 'wb') as f:
#pickle.dump(compounds, f)
random.seed(666)
random.shuffle(compounds)

energy_pbe0 = np.array([mol.properties for mol in compounds])
#energy_delta = np.array([mol.properties2 for mol in compounds])
コード例 #21
0

    keys = sorted(data.keys())
    
    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(keys)

    n_test  = 500
    n_train = 1000
    
    n_total = n_test+n_train

    for xyz_file in keys[:n_total]:

        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)
        mol.properties = data[xyz_file]
        mol.representation = generate_input(mol.nuclear_charges, mol.coordinates)
   
        mols.append(mol)


    # Make training and test sets

    training = mols[:n_train]
    test  = mols[-n_test:]

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])
    
コード例 #22
0
import os
import jax_representation as jrep
import matplotlib.pyplot as plt
import qml
import itertools
import numpy as np

#path to xyz files
database = "../Databases/XYZ_diatom/"
distance_vector = []
OM_overlap_vector = []
CM_overlap_vector = []
for xyzfile in os.listdir(database):
    if xyzfile.endswith(".xyz"):
        xyz_fullpath = database + xyzfile
        compound = qml.Compound(xyz_fullpath)
        distance_vector.append(
            xyzfile[10:13]
        )  #distance is given in 'name...i.xyz', retrieve i here
        print('file:', xyzfile, 'distance:', xyzfile[10:13])
        Z = compound.nuclear_charges.astype(float)
        R = compound.coordinates
        N = float(len(Z))

        #Calculate Overlap matrix and determine dimensionality dim
        OM, order = jrep.OM_full_sorted(Z, R, N)
        CM, order = jrep.CM_full_sorted(Z, R, N)
        dim = len(order)

        #loop over OM and add all off-diagonal elements
        OM_overlap = 0
コード例 #23
0
    return energies


if __name__ == "__main__":

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies("hof_qm7.txt")

    # Generate a list of qml.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys()):

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz="qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 1000
コード例 #24
0
def train():
    #	print(" -> Start training")
    #	start = time()
    #	subprocess.Popen(("python3","model_training.py","train"))
    #	end = time()
    #
    #	total_runtime = end - start
    #
    #	print(" -> Training time: {:.3f}".format(total_runtime))
    #data = get_properties("energies.txt")
    data = get_properties("train")
    mols = []
    mols_pred = []

    SIGMA = 2.5  #float(sys.argv[1])

    for name in sorted(data.keys()):
        mol = qml.Compound()
        mol.read_xyz("xyz/" + name + ".xyz")

        # Associate a property (heat of formation) with the object
        mol.properties = data[name][0]
        mols.append(mol)

    shuffle(mols)

    #mols_train = mols[:400]
    #mols_test = mols[400:]

    # REPRESENTATIONS
    print("\n -> calculate representations")
    start = time()
    x = []
    disp_x = []
    f = []
    e = []
    q = []

    for mol in mols:
        (x1, dx1) = generate_fchl_acsf(mol.nuclear_charges,
                                       mol.coordinates,
                                       gradients=True,
                                       pad=23,
                                       elements=[1, 6, 7, 8, 16, 17])

        e.append(mol.properties)
        f.append(data[(mol.name)[4:-4]][1])
        x.append(x1)
        disp_x.append(dx1)
        q.append(mol.nuclear_charges)

    X_train = np.array(x)
    F_train = np.array(f)
    F_train *= -1
    E_train = np.array(e)
    dX_train = np.array(disp_x)
    Q_train = q

    E_mean = np.mean(E_train)

    E_train -= E_mean

    F_train = np.concatenate(F_train)

    end = time()

    print(end - start)
    print("")
    print(" -> calculating Kernels")

    start = time()
    Kte = get_atomic_local_kernel(X_train, X_train, Q_train, Q_train, SIGMA)
    #Kte_test = get_atomic_local_kernel(X_train,  X_test, Q_train,  Q_test,  SIGMA)

    Kt = get_atomic_local_gradient_kernel(X_train, X_train, dX_train, Q_train,
                                          Q_train, SIGMA)
    #Kt_test = get_atomic_local_gradient_kernel(X_train,  X_test, dX_test,  Q_train,  Q_test, SIGMA)

    C = np.concatenate((Kte, Kt))

    Y = np.concatenate((E_train, F_train.flatten()))
    end = time()
    print(end - start)
    print("")

    print("Alphas operator ...")
    start = time()
    alpha = svd_solve(C, Y, rcond=1e-12)
    end = time()
    print(end - start)
    print("")

    print("save X")
    np.save('X_active_learning.npy', X_train)
    #    with open("X_mp2.cpickle", 'wb') as f:
    #      cPickle.dump(X_train, f, protocol=2)

    print("save alphas")
    np.save('alphas_active_learning.npy', alpha)
    #    with open("alphas_mp2.cpickle", 'wb') as f:
    #      cPickle.dump(alpha, f, protocol=2)

    print("save Q")
    np.save('Q_active_learning.npy', Q_train)
    #    with open("Q_mp2.cpickle", 'wb') as f:
    #      cPickle.dump(Q_train, f, protocol=2)

    eYt = np.dot(Kte, alpha)
    fYt = np.dot(Kt, alpha)
    #eYt_test = np.dot(Kte_test, alpha)
    #fYt_test = np.dot(Kt_test, alpha)

    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        E_train, eYt)
    print("TRAINING ENERGY   MAE = %10.4f  slope = %10.4f  intercept = %10.4f  r^2 = %9.6f" % \
            (np.mean(np.abs(E_train - eYt)), slope, intercept, r_value ))

    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
        F_train.flatten(), fYt.flatten())
    print("TRAINING FORCE    MAE = %10.4f  slope = %10.4f  intercept = %10.4f  r^2 = %9.6f" % \
             (np.mean(np.abs(F_train.flatten() - fYt.flatten())), slope, intercept, r_value ))
コード例 #25
0
import jax_additional_derivative as jader

#what to do?
do_fingerprint_distance = False
do_derivative_calculation = False
do_plot_derivatives = True

#path to xyz files
database = "/home/linux-miriam/Databases/BOB/"
'''define folder of .xyz files'''
names = [database + "BOB1.xyz", database + "BOB2.xyz"]

#which representations?
namelist = ["CM", "EVCM", "BOB", "OM", "EVOM"]

compound1 = qml.Compound(names[0])
compound2 = qml.Compound(names[1])

if do_fingerprint_distance:

    Z1 = compound1.nuclear_charges.astype(float)
    R1 = compound1.coordinates

    Z2 = compound2.nuclear_charges.astype(float)
    R2 = compound2.coordinates

    #calculate difference to reference constitution
    M_CM1 = jrep.CM_full_unsorted_matrix(Z1, R1, size=4)
    M_CM2 = jrep.CM_full_unsorted_matrix(Z2, R2, size=4)

    M_EVCM1 = jrep.CM_ev_unsrt(Z1, R1, N=0, size=4)
コード例 #26
0
#!/usr/bin/env python
from __future__ import print_function
import qml

if __name__ == "__main__":

    # Create the compound object mol from the file qm7/0001.xyz which happens to be methane
    mol = qml.Compound(xyz="qm7/0001.xyz")

    # Generate and print a coulomb matrix for compound with 5 atoms
    mol.generate_coulomb_matrix(size=5, sorting="row-norm")
    print(mol.representation)

    # Generate and print BoB bags for compound containing C and H
    mol.generate_bob(size=5, asize={"C": 2, "H": 5})
    print(mol.representation)

    # Print other properties stored in the object
    print(mol.coordinates)
    print(mol.atomtypes)
    print(mol.nuclear_charges)
    print(mol.name)
    print(mol.unit_cell)
コード例 #27
0
ファイル: test_fchl.py プロジェクト: syyunn/qml
def test_krr_fchl_local():

    # Test that all kernel arguments work
    kernel_args = {
        "cut_distance": 1e6,
        "cut_start": 0.5,
        "two_body_width": 0.1,
        "two_body_scaling": 2.0,
        "two_body_power": 6.0,
        "three_body_width": 3.0,
        "three_body_scaling": 2.0,
        "three_body_power": 3.0,
        "alchemy": "periodic-table",
        "alchemy_period_width": 1.0,
        "alchemy_group_width": 1.0,
        "fourier_order": 2,
    }

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:100]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_fchl_representation(cut_distance=1e6)
        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = len(mols) // 3
    n_train = len(mols) - n_test

    training = mols[:n_train]
    test = mols[-n_test:]

    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 2.5
    llambda = 1e-8

    K_symmetric = get_local_symmetric_kernels(X, [sigma], **kernel_args)[0]
    K = get_local_kernels(X, X, [sigma], **kernel_args)[0]

    assert np.allclose(K, K_symmetric), "Error in FCHL symmetric local kernels"
    assert np.invert(np.all(
        np.isnan(K_symmetric))), "FCHL local symmetric kernel contains NaN"
    assert np.invert(np.all(np.isnan(K))), "FCHL local kernel contains NaN"

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = get_local_kernels(Xs, X, [sigma], **kernel_args)[0]
    assert np.invert(np.all(
        np.isnan(Ks))), "FCHL local testkernel contains NaN"

    Yss = np.dot(Ks, alpha)

    mae = np.mean(np.abs(Ys - Yss))
    assert abs(2 - mae) < 1.0, "Error in FCHL local kernel-ridge regression"
コード例 #28
0
'''we start with a straight molecule stretching out on the x axis:

      H--C===C--H

to then move both H simultaneously anti-clockwise by an angle phi:
    
                H
               / phi
         C===C.......
        /
        H

'''

#calculate reference values
compound = qml.Compound(reference)
Z = compound.nuclear_charges.astype(float)
R = compound.coordinates

ref_M_EVCM = jrep.CM_ev_unsrt(Z, R, size = 4)


dZ_eigenvalues = [] #list of eigenvalue vectors. length is same as len of name_vector
dimZ_list = [] #dimension of files may vary. store all dimensions here


dZ_slot1_list = [[],[],[],[]]
dZ_slot2_list = [[],[],[],[]]
dZ_slot3_list = [[],[],[],[]]
dZ_slot4_list = [[],[],[],[]]
コード例 #29
0
    return energies


"""
Generating dict with binding energies and filename.
"""

if __name__ == "__main__":
    print("\n -> load binding energies")

    data = get_energies("data/trainUrt.txt")
    data2 = get_energies("data/testUrt.txt")
    mols = []
    mols_test = []
    for xyz_file in tqdm(sorted(data.keys())):
        mol = qml.Compound()
        mol.read_xyz("data/QM9Train/" + xyz_file)
        mol.properties = data[xyz_file]
        mols.append(mol)
    for xyz_file in tqdm(sorted(data2.keys())):
        mol = qml.Compound()
        mol.read_xyz("data/QM9Test/" + xyz_file)
        mol.properties = data2[xyz_file]
        mols_test.append(mol)

    mbtypes = get_slatm_mbtypes(
        [mol.nuclear_charges for mol in mols + mols_test])

    print("\n -> generate representation")
    for mol in tqdm(mols):
        mol.generate_slatm(mbtypes, local=False)
コード例 #30
0
def test_krr_gaussian_local_cmat():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.Compound() objects
        mol = qml.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 100
    n_train = 200

    training = mols[:n_train]
    test = mols[-n_test:]

    X = np.concatenate([mol.representation for mol in training])
    Xs = np.concatenate([mol.representation for mol in test])

    N = np.array([mol.natoms for mol in training])
    Ns = np.array([mol.natoms for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 724.0
    llambda = 10**(-6.5)

    K = get_local_kernels_gaussian(X, X, N, N, [sigma])[0]
    assert np.allclose(K, K.T), "Error in local Gaussian kernel symmetry"

    K_test = np.loadtxt(test_dir + "/data/K_local_gaussian.txt")
    assert np.allclose(
        K, K_test), "Error in local Gaussian kernel (vs. reference)"

    K_test = get_atomic_kernels_gaussian(training, training, [sigma])[0]
    assert np.allclose(K,
                       K_test), "Error in local Gaussian kernel (vs. wrapper)"

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = get_local_kernels_gaussian(Xs, X, Ns, N, [sigma])[0]

    Ks_test = np.loadtxt(test_dir + "/data/Ks_local_gaussian.txt")
    # Somtimes a few coulomb matrices differ because of parallel sorting and numerical error
    # Allow up to 5 molecules to differ from the supplied reference.
    differences_count = len(set(np.where(Ks - Ks_test > 1e-7)[0]))
    assert differences_count < 5, "Error in local Laplacian kernel (vs. reference)"
    # assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. reference)"

    Ks_test = get_atomic_kernels_gaussian(test, training, [sigma])[0]
    assert np.allclose(Ks,
                       Ks_test), "Error in local Gaussian kernel (vs. wrapper)"

    Yss = np.dot(Ks, alpha)

    mae = np.mean(np.abs(Ys - Yss))
    assert abs(19.0 -
               mae) < 1.0, "Error in local Gaussian kernel-ridge regression"