예제 #1
0
print "TIMER load_data", time.clock() - ttt


test_indices_rec = []
teach_indices_rec = []

alpha_predicted = []
alpha_target = []
energy_target = []
energy_error = []

# --------------------------------------------
# Setup a Gaussian Process once and for all so that parameters do not change
# --------------------------------------------
gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]),
                     nugget=nugget, verbose=True, normalise=normalise, do_features_projection=False, low_memory=False, metric=metric)

# --------------------------------------------
# Loop over different training sets of the same size
# --------------------------------------------    
for iteration in range(Ndatabases):
    # --------------------------------------------
    # Pick Ntest configurations randomly
    # --------------------------------------------
    test_indices = list(sp.random.randint(0, high=dataset[target_property].size, size=Ntest))
    db_indices = randint_norepeat(0, exclude=test_indices, high=dataset[target_property].size, size=Nteach)
    teach_indices_rec.append(db_indices)
    
    X = dataset['X'][test_indices + db_indices]
    T = dataset[target_property][test_indices + db_indices]
    print "\n", "-"*60, "\n"
예제 #2
0
# --------------------------------------------
# in this case, only sorted eigenvalues of Coulomb matrix
ttt = time.clock()
eigX = [(eigh(M, eigvals_only=True))[::-1] for M in X]
eigt = [(eigh(M, eigvals_only=True))[::-1] for M in Xtest]
print "TIMER eval_features", time.clock() - ttt

# Observations
y = T.ravel()

alpha = []
covmat = []
for theta0 in [10.0**i for i in sp.linspace(-2,5,7)]: # sp.linspace(1,1, N_models):
    # Setup a Gaussian Process model
    ttt = time.clock()
    gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]),
                         nugget=1e-3, verbose=True, low_memory=False)

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(eigX, y)
    print "TIMER teach", time.clock() - ttt

    ttt = time.clock()
    # # Make the prediction on training set
    # y_pred, MSE = gp.predict(eigX, eval_MSE=True)
    # sigma = sp.sqrt(MSE)
    # print('\n training set:')
    # print('MAE:  %5.2f kcal/mol' % sp.absolute(y_pred-y).mean(axis=0))
    # print('RMSE: %5.2f kcal/mol' % sp.square(y_pred-y).mean(axis=0)**.5)
    # Make the prediction on test set
    y_pred, MSE = gp.predict(eigt, eval_MSE=True)
    sigma = sp.sqrt(MSE)
예제 #3
0
eigX = [(eigh(M, eigvals_only=True))[::-1] for M in X]
eigt = [(eigh(M, eigvals_only=True))[::-1] for M in Xtest]
print "TIMER eval_features", time.clock() - ttt

# Observations
y = T.ravel()

alpha = []
covmat = []
for theta0 in [10.0**i
               for i in sp.linspace(-2, 5, 7)]:  # sp.linspace(1,1, N_models):
    # Setup a Gaussian Process model
    ttt = time.clock()
    gp = GaussianProcess(corr='absolute_exponential',
                         theta0=sp.asarray([theta0]),
                         nugget=1e-3,
                         verbose=True,
                         low_memory=False)

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(eigX, y)
    print "TIMER teach", time.clock() - ttt

    ttt = time.clock()
    # # Make the prediction on training set
    # y_pred, MSE = gp.predict(eigX, eval_MSE=True)
    # sigma = sp.sqrt(MSE)
    # print('\n training set:')
    # print('MAE:  %5.2f kcal/mol' % sp.absolute(y_pred-y).mean(axis=0))
    # print('RMSE: %5.2f kcal/mol' % sp.square(y_pred-y).mean(axis=0)**.5)
    # Make the prediction on test set
예제 #4
0
# --------------------------------------------
# Load all database
# --------------------------------------------
ttt = time.clock()
dataset = pickle.load(open(dataset_loc, 'r'))

print "TIMER load_data", time.clock() - ttt

test_indices_rec, teach_indices_rec = [], []
alpha_predicted, alpha_target = [], []
energy_target, energy_error = [], []
 
# --------------------------------------------
# Setup a Gaussian Process 
# --------------------------------------------
gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]),
                     nugget=nugget, verbose=True, normalise=normalise, do_features_projection=False, low_memory=False, metric=metric)
gp_level2 = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0_level2]),
                     nugget=nugget, verbose=True, normalise=normalise, do_features_projection=False, low_memory=False, metric=metric)


# --------------------------------------------
# Loop over different training sets of the same size
# --------------------------------------------    
for iteration in range(Ndatabases):
    # --------------------------------------------
    # Pick Ntest configurations randomly
    # --------------------------------------------
    test_indices = list(sp.random.randint(0, high=dataset[target_property].size, size=Ntest))
    db_indices = randint_norepeat(0, exclude=test_indices, high=dataset[target_property].size, size=Nteach)
    sp.save('db_indices_%d-%s' % (iteration, time.ctime()), db_indices)
    teach_indices_rec.append(db_indices)
예제 #5
0
    y = sp.spatial.distance.pdist(y[:,None])
    frequency, bins, patches = plt.hist(y, bins=53, normed=True)
    bins_dummy = list(bins)
    bins_dummy.append(bins_dummy.pop(0))
    bins = ((bins + sp.asarray(bins_dummy)) / 2)[:-1]
    histograms.append(sp.row_stack((bins, frequency)))

# --------------------------------------------
# Setup a Gaussian Process
# --------------------------------------------
theta0 = 1.0e1
nugget = 1.0e-15
normalise = 1
metric = 'cityblock'

gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]),
                     nugget=nugget, verbose=True, normalise=normalise, do_features_projection=False, low_memory=False, metric=metric)

eigX = [(eigh(M, eigvals_only=True))[::-1] for M in dataset['X']]
gp.calc_kernel_matrix(eigX)
d = sp.spatial.distance.squareform(gp.D)
frequency, bins, patches = plt.hist(y, bins=47, normed=True)
bins_dummy = list(bins)
bins_dummy.append(bins_dummy.pop(0))
bins = ((bins + sp.asarray(bins_dummy)) / 2)[:-1]
plt.clf()

for i, h in enumerate(histograms):
    plt.plot(h[0], h[1],'-', label=target_properties[i])
plt.plot(bins, frequency, 'o', label='eigenvalues of Coulomb matrix')
plt.xlabel("distance in normalised property space")
plt.ylabel("frequency")
예제 #6
0
    bins_dummy.append(bins_dummy.pop(0))
    bins = ((bins + sp.asarray(bins_dummy)) / 2)[:-1]
    histograms.append(sp.row_stack((bins, frequency)))

# --------------------------------------------
# Setup a Gaussian Process
# --------------------------------------------
theta0 = 1.0e1
nugget = 1.0e-15
normalise = 1
metric = 'cityblock'

gp = GaussianProcess(corr='absolute_exponential',
                     theta0=sp.asarray([theta0]),
                     nugget=nugget,
                     verbose=True,
                     normalise=normalise,
                     do_features_projection=False,
                     low_memory=False,
                     metric=metric)

eigX = [(eigh(M, eigvals_only=True))[::-1] for M in dataset['X']]
gp.calc_kernel_matrix(eigX)
d = sp.spatial.distance.squareform(gp.D)
frequency, bins, patches = plt.hist(y, bins=47, normed=True)
bins_dummy = list(bins)
bins_dummy.append(bins_dummy.pop(0))
bins = ((bins + sp.asarray(bins_dummy)) / 2)[:-1]
plt.clf()

for i, h in enumerate(histograms):
    plt.plot(h[0], h[1], '-', label=target_properties[i])
예제 #7
0
ttt = time.clock()
dataset = pickle.load(open(dataset_loc, 'r'))

print "TIMER load_data", time.clock() - ttt

test_indices_rec, teach_indices_rec = [], []
alpha_predicted, alpha_target = [], []
energy_target, energy_error = [], []

# --------------------------------------------
# Setup a Gaussian Process
# --------------------------------------------
gp = GaussianProcess(corr='absolute_exponential',
                     theta0=sp.asarray([theta0]),
                     nugget=nugget,
                     verbose=True,
                     normalise=normalise,
                     do_features_projection=False,
                     low_memory=False,
                     metric=metric)
gp_level2 = GaussianProcess(corr='absolute_exponential',
                            theta0=sp.asarray([theta0_level2]),
                            nugget=nugget,
                            verbose=True,
                            normalise=normalise,
                            do_features_projection=False,
                            low_memory=False,
                            metric=metric)

# --------------------------------------------
# Loop over different training sets of the same size
# --------------------------------------------