def test_krr_bob():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys()):

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        # mol.generate_eigenvalue_coulomb_matrix()
        mol.generate_coulomb_matrix()

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 2000
    n_train = 4000

    training = mols[:n_train]
    test = mols[-n_test:]

    # List of representations
    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 4000.40
    llambda = 1e-11

    # Generate training Kernel
    K = laplacian_kernel(X, X, sigma)

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = laplacian_kernel(X, Xs, sigma)
    Yss = np.dot(Ks.transpose(), alpha)

    mae = np.mean(np.abs(Ys - Yss))
    print(mae)
Esempio n. 2
0
def test_arad():
    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects
    mols = []

    for xyz_file in sorted(data.keys())[:10]:

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm

        mol.representation = generate_arad_representation(
            mol.coordinates, mol.nuclear_charges)

        mols.append(mol)

    sigmas = [25.0]

    X1 = np.array([mol.representation for mol in mols])

    K_local_asymm = get_local_kernels_arad(X1, X1, sigmas)
    K_local_symm = get_local_symmetric_kernels_arad(X1, sigmas)

    assert np.allclose(K_local_symm,
                       K_local_asymm), "Symmetry error in local kernels"
    assert np.invert(np.all(np.isnan(
        K_local_asymm))), "ERROR: ARAD local symmetric kernel contains NaN"

    K_global_asymm = get_global_kernels_arad(X1, X1, sigmas)
    K_global_symm = get_global_symmetric_kernels_arad(X1, sigmas)

    assert np.allclose(K_global_symm,
                       K_global_asymm), "Symmetry error in global kernels"
    assert np.invert(np.all(np.isnan(
        K_global_asymm))), "ERROR: ARAD global symmetric kernel contains NaN"

    molid = 5
    X1 = generate_arad_representation(mols[molid].coordinates,
                                      mols[molid].nuclear_charges,
                                      size=mols[molid].natoms)
    XA = X1[:mols[molid].natoms]

    K_atomic_asymm = get_atomic_kernels_arad(XA, XA, sigmas)
    K_atomic_symm = get_atomic_symmetric_kernels_arad(XA, sigmas)

    assert np.allclose(K_atomic_symm,
                       K_atomic_asymm), "Symmetry error in atomic kernels"
    assert np.invert(np.all(np.isnan(
        K_atomic_asymm))), "ERROR: ARAD atomic symmetric kernel contains NaN"

    K_atomic_asymm = get_atomic_kernels_arad(XA, XA, sigmas)
Esempio n. 3
0
def test_nn_bob():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects
    mols = []

    numbers = dict()

    for xyz_file in sorted(data.keys()):

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        # mol.generate_eigenvalue_coulomb_matrix()
        mol.generate_coulomb_matrix()
        # mol.generate_bob()
        # print(mol.representation)
        mols.append(mol)

    es = np.array([mol.properties for mol in mols])
    fc, fa = get_mean_atomic_contribution(mols, es)

    for i in range(len(mols)):

        mols[i].properties = fc[i]

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 2000
    n_train = 4000

    training = mols[:n_train]
    test = mols[-n_test:]

    # List of representations
    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    print(X.shape)
    print(Y.shape)

    dtype = torch.float
    device = torch.device("cuda:0")

    N, D_in, H, H2 = n_train, X.shape[1], 50, 10

    x = torch.from_numpy(X).to(device, torch.float)
    y = torch.from_numpy(Y.reshape((N, 1))).to(device, torch.float)

    xs = torch.from_numpy(Xs).to(device, torch.float)
    ys = torch.from_numpy(Ys.reshape((n_test, 1))).to(device, torch.float)

    model = NeuralNet(X.shape[1], H, H2).to(device)

    # Loss and optimizer
    # criterion = nn.L1Loss()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    for epoch in range(100000):

        yt = model(x)

        if (epoch % 1000 == 0):
            loss = (y - yt).pow(2).sum()
            rmsd = torch.sqrt(loss / n_train) * 627.51
            mae = (y - yt).abs().sum() * 627.51 / n_train

            yss = model.forward(xs)

            rmsd_s = torch.sqrt((yss - ys).pow(2).sum() / n_test) * 627.51
            mae_s = (yss - ys).abs().sum() * 627.51 / n_test
            print(epoch, mae, rmsd, mae_s, rmsd_s)

        loss = criterion(yt, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # for epoch in range(100000):

    #     if (epoch % 1000 == 0):
    #         yt = model(x)
    #
    #         loss =(y - yt).pow(2).sum()
    #         rmsd = torch.sqrt(loss/n_train) * 627.51
    #         mae = (y - yt).abs().sum() * 627.51 / n_train

    #         yss = model.forward(xs)
    #
    #         rmsd_s = torch.sqrt((yss - ys).pow(2).sum()/n_test) * 627.51
    #         mae_s = (yss - ys).abs().sum() * 627.51 / n_test
    #         print(epoch, mae, rmsd, mae_s, rmsd_s)
    #
    #     def closure():

    #         yt = model(x)
    #         return (y - yt).pow(2).sum()

    #     optimizer2.zero_grad()
    #     loss.backward(retain_graph=True)
    #     optimizer2.step(closure)

    print(yt[:10] * 627.51, y[:10] * 627.51)
Esempio n. 4
0
def test_nn_bob():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects
    mols = []

    numbers = dict()

    for xyz_file in sorted(data.keys()):

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_eigenvalue_coulomb_matrix()
        # print(mol.representation)
        mols.append(mol)

    # Shuffle molecules
    # np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 1000
    n_train = 1000

    training = mols[:n_train]
    test = mols[-n_test:]

    # List of representations
    X = np.array([mol.representation for mol in training])
    Xs = np.array([mol.representation for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    print(X.shape)
    print(Y.shape)
    # exit()

    import torch

    dtype = torch.float
    # device = torch.device("cpu")
    device = torch.device("cuda:0")  # Uncomment this to run on GPU

    N, D_in, H, D_out = n_train, X.shape[1], 64, 1

    x = torch.from_numpy(X).to(device, torch.float)
    y = torch.from_numpy(Y.reshape((N, 1))).to(device, torch.float)

    xs = torch.from_numpy(Xs).to(device, torch.float)
    ys = torch.from_numpy(Ys.reshape((n_test, 1))).to(device, torch.float)

    # print(x.shape)
    # print(y.shape)

    # print(x)
    # print(X)
    # # Randomly initialize weights
    # w1 = torch.randn(D_in, H, device=device, dtype=dtype)
    # w2 = torch.randn(H, D_out, device=device, dtype=dtype)

    # print(w1.shape)
    # print(w2.shape)

    # N, D_in, H, D_out = 64, 1000, 100, 10

    # # Create random input and output data
    #     x = torch.randn(N, D_in, device=device, dtype=dtype)
    #     y = torch.randn(N, D_out, device=device, dtype=dtype)

    # Randomly initialize weights
    w1 = torch.randn(D_in, H, device=device, dtype=dtype)
    w2 = torch.randn(H, D_out, device=device, dtype=dtype)

    learning_rate = 1e-8
    for t in range(5000):
        # Forward pass: compute predicted y
        h = x.mm(w1)
        h_relu = h.clamp(min=0)
        y_pred = h_relu.mm(w2)

        # Compute and print loss
        loss = (y_pred - y).pow(2).sum().item()

        hval = xs.mm(w1)
        hval_relu = hval.clamp(min=0)
        yval_pred = hval_relu.mm(w2)

        loss2 = (yval_pred - ys).pow(2).sum().item()
        print(t, loss / n_train * 627.51, loss2 / n_test * 627.51)

        # Backprop to compute gradients of w1 and w2 with respect to loss
        grad_y_pred = 2.0 * (y_pred - y)
        grad_w2 = h_relu.t().mm(grad_y_pred)
        grad_h_relu = grad_y_pred.mm(w2.t())
        grad_h = grad_h_relu.clone()
        grad_h[h < 0] = 0
        grad_w1 = x.t().mm(grad_h)

        # Update weights using gradient descent
        w1 -= learning_rate * grad_w1
        w2 -= learning_rate * grad_w2
Esempio n. 5
0
def test_krr_gaussian_local_cmat():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 100
    n_train = 200

    training = mols[:n_train]
    test = mols[-n_test:]

    X = np.concatenate([mol.representation for mol in training])
    Xs = np.concatenate([mol.representation for mol in test])

    N = np.array([mol.natoms for mol in training])
    Ns = np.array([mol.natoms for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 724.0
    llambda = 10**(-6.5)

    K = get_local_kernels_gaussian(X, X, N, N, [sigma])[0]
    assert np.allclose(K, K.T), "Error in local Gaussian kernel symmetry"

    K_test = np.loadtxt(test_dir + "/data/K_local_gaussian.txt")
    assert np.allclose(
        K, K_test), "Error in local Gaussian kernel (vs. reference)"

    K_test = get_atomic_kernels_gaussian(training, training, [sigma])[0]
    assert np.allclose(K,
                       K_test), "Error in local Gaussian kernel (vs. wrapper)"

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = get_local_kernels_gaussian(Xs, X, Ns, N, [sigma])[0]

    Ks_test = np.loadtxt(test_dir + "/data/Ks_local_gaussian.txt")
    # Somtimes a few coulomb matrices differ because of parallel sorting and numerical error
    # Allow up to 5 molecules to differ from the supplied reference.
    differences_count = len(set(np.where(Ks - Ks_test > 1e-7)[0]))
    assert differences_count < 5, "Error in local Laplacian kernel (vs. reference)"
    # assert np.allclose(Ks, Ks_test), "Error in local Gaussian kernel (vs. reference)"

    Ks_test = get_atomic_kernels_gaussian(test, training, [sigma])[0]
    assert np.allclose(Ks,
                       Ks_test), "Error in local Gaussian kernel (vs. wrapper)"

    Yss = np.dot(Ks, alpha)

    mae = np.mean(np.abs(Ys - Yss))
    print(mae)
    assert abs(19.0 -
               mae) < 1.0, "Error in local Gaussian kernel-ridge regression"
Esempio n. 6
0
def test_krr_laplacian_local_cmat():

    test_dir = os.path.dirname(os.path.realpath(__file__))

    # Parse file containing PBE0/def2-TZVP heats of formation and xyz filenames
    data = get_energies(test_dir + "/data/hof_qm7.txt")

    # Generate a list of qml.data.Compound() objects"
    mols = []

    for xyz_file in sorted(data.keys())[:1000]:

        # Initialize the qml.data.Compound() objects
        mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)

        # Associate a property (heat of formation) with the object
        mol.properties = data[xyz_file]

        # This is a Molecular Coulomb matrix sorted by row norm
        mol.generate_atomic_coulomb_matrix(size=23, sorting="row-norm")

        mols.append(mol)

    # Shuffle molecules
    np.random.seed(666)
    np.random.shuffle(mols)

    # Make training and test sets
    n_test = 100
    n_train = 200

    training = mols[:n_train]
    test = mols[-n_test:]

    X = np.concatenate([mol.representation for mol in training])
    Xs = np.concatenate([mol.representation for mol in test])

    N = np.array([mol.natoms for mol in training])
    Ns = np.array([mol.natoms for mol in test])

    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    # Set hyper-parameters
    sigma = 10**(3.6)
    llambda = 10**(-12.0)

    K = get_local_kernels_laplacian(X, X, N, N, [sigma])[0]
    assert np.allclose(K, K.T), "Error in local Laplacian kernel symmetry"

    # Test below will sometimes fail, since sorting occasionally differs due close row-norms
    # K_test = np.loadtxt(test_dir + "/data/K_local_laplacian.txt")
    # assert np.allclose(K, K_test), "Error in local Laplacian kernel (vs. reference)"

    # Solve alpha
    K[np.diag_indices_from(K)] += llambda
    alpha = cho_solve(K, Y)

    # Calculate prediction kernel
    Ks = get_local_kernels_laplacian(Xs, X, Ns, N, [sigma])[0]

    # Test below will sometimes fail, since sorting occasionally differs due close row-norms
    # Ks_test = np.loadtxt(test_dir + "/data/Ks_local_laplacian.txt")
    # assert np.allclose(Ks, Ks_test), "Error in local Laplacian kernel (vs. reference)"

    Yss = np.dot(Ks, alpha)

    mae = np.mean(np.abs(Ys - Yss))
    assert abs(8.7 -
               mae) < 1.0, "Error in local Laplacian kernel-ridge regression"