Ejemplo n.º 1
0
    def test_setup(self):
        nsamples = 200

        # create x/y with matching values
        x = np.arange(nsamples)
        y = np.arange(nsamples)

        # dataset should be splitted pure traing 100, validation 50, test 50
        candidate = Dataset(x,
                            y,
                            split_test=0.25,
                            split_validation=1.0 / 3.0,
                            normalize_input=False)

        # check if splitting was correct
        self.assertEqual(50, len(candidate.testing[0]))
        self.assertEqual(50, len(candidate.testing[1]))
        self.assertEqual(50, len(candidate.validation[0]))
        self.assertEqual(50, len(candidate.validation[1]))
        self.assertEqual(100, len(candidate.training[0]))
        self.assertEqual(100, len(candidate.training[1]))

        # check if x-y pairs are still correct
        np.testing.assert_array_equal(*candidate.testing)
        np.testing.assert_array_equal(*candidate.validation)
        np.testing.assert_array_equal(*candidate.training)
Ejemplo n.º 2
0
def main():   

    S, P = np.load("butadien/data/dataset.npy")

    dataset = Dataset(S, P, split_test=0.25)


    trainer = Trainer(
        EluTrNNN([dim**2, 200, 100, dim**2], log_histograms=True),
        cost_function=IdempotencyPenalty(coupling=1e-6),
        optimizer=tf.train.AdamOptimizer(learning_rate=5e-3)
    )

    trainer.setup()
    network_idem, sess_idem = trainer.train(
        dataset,
        convergence_threshold=1e-5,
        #summary_save_path="butadien/log/idem"
    )
    graph_idem = trainer.graph

    with trainer.graph.as_default():
        error = trainer.cost_function.idempotency_error(network_idem)
        error_val = sess_idem.run(error, {network_idem.input_tensor: dataset.testing[0]})

    msg.info("Achieved idempotency error: " + str(error_val), 2)
Ejemplo n.º 3
0
def main():   

    S, P = np.load("butadien/data/dataset.npy")

    dataset = Dataset(S, P, split_test=0.25)

    save_path = "butadien/scripts/log/idem"

    try:
        rmtree(save_path)
    except:
        pass

    trainer = Trainer(
        SeluTrNNN(
            [dim**2, 700, 700, dim**2], 
            log_histograms=True
        ),
        #error_function=AbsoluteError(),
        #cost_function=RegularizedMSE(alpha=1e-7),
        cost_function=IdempotencyPenalty(
            dataset.inverse_input_transform,
            coupling=1e-5
        ),
        #optimizer=tf.train.AdamOptimizer(learning_rate=1e-3)
    )

    trainer.setup()
    network, sess = trainer.train(
        dataset,
        convergence_threshold=1e-6,
        summary_save_path=save_path,
        mini_batch_size=15
    )
    graph_idem = trainer.graph

    with trainer.graph.as_default():
        y = tf.placeholder(
                dtype="float32", 
                shape=[None, network.structure[-1]],
                name="y"
            )
        error_val = sess.run(
            AbsoluteError().function(network, y), 
            {
                network.input_tensor: dataset.testing[0],
                y: dataset.testing[1]
            }
        )
        
        error_idem = sess.run(
            trainer.cost_function.idempotency_error(network), 
            {network.input_tensor: dataset.testing[0]}
        )

    msg.info("Achieved absolute error:    {:0.3E}".format(error_val), 2)
    msg.info("Achieved idempotency error: {:0.3E}".format(error_idem), 2)
Ejemplo n.º 4
0
def fetch_dataset():
    #--- the dataset ---
    S, P = np.load("butadien/data/dataset.npy")

    ind_cut = 150
    index = np.arange(200)
    np.random.shuffle(index)

    S_test = np.array(S)[index[ind_cut:]]
    P_test = np.array(P)[index[ind_cut:]]

    S_train = np.array(S)[index[:ind_cut]]
    P_train = np.array(P)[index[:ind_cut]]

    dataset = Dataset(np.array(S_train), np.array(P_train), split_test=0.0)

    dataset.testing = (Dataset.normalize(S_test, mean=dataset.x_mean, std=dataset.x_std)[0], P_test)
    #---
    return dataset
Ejemplo n.º 5
0
    def _test_train_network_for_1d_function(self, function):

        x_val = np.random.rand(200, 1) * 2
        y_val = function(x_val)

        dataset = Dataset(x_val, y_val)

        with tf.Session() as sess:
            #TODO: umbeneneen!
            x = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="x")
            y = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="y")

            network = EluTrNNN([1, 5, 3, 1])
            network.setup(input_tensor=x)

            optimizer = tf.train.AdamOptimizer()
            cost = tf.losses.mean_squared_error(y, network.output_tensor)
            training = optimizer.minimize(cost)

            sess.run(tf.global_variables_initializer())

            old_error = 1e16
            n = 0
            n_max = 1e4
            converged = False
            while not converged and n < n_max:

                for i in range(200):
                    sess.run(training, {
                        x: dataset.training[0],
                        y: dataset.training[1]
                    })

                error = sess.run(cost, {
                    x: dataset.validation[0],
                    y: dataset.validation[1]
                })

                if np.abs(old_error - error) < 1e-8:
                    converged = True
                else:
                    old_error = error
                    n += 1

            if not converged:
                self.fail("Training unsuccessfull, max iteration exceeded")

            np.testing.assert_almost_equal(sess.run(
                cost, {
                    x: dataset.testing[0],
                    y: dataset.testing[1]
                }),
                                           0.0,
                                           decimal=4)
Ejemplo n.º 6
0
    def setUp(self):

        msg.print_level = 0

        self.structure = [1, 4, 1]
        self.nsamples = 100

        x = np.random.rand(self.nsamples, self.structure[0]) * 10
        y = np.sum(x**2, axis=1)

        self.dataset = Dataset(x.reshape(self.nsamples, self.structure[0]),
                               y.reshape(self.nsamples, self.structure[-1]))
Ejemplo n.º 7
0
    def setUp(self):

        msg.print_level = 0

        self.input_dim = 5
        self.output_dim = 5
        nsamples = 100

        x = np.linspace(-2, 2, nsamples * self.input_dim)
        y = np.sin(x)

        self.dataset = Dataset(x.reshape(nsamples, self.input_dim),
                               y.reshape(nsamples, self.output_dim))
Ejemplo n.º 8
0
def main(species="H"):

    #--- assemble the dataset ---
    root_directory = normpath(join(dirname(realpath(__file__)), "../"))
    dataset_source_folder = join(root_directory, "dataset/")
    sources = [
        join(dataset_source_folder, directory) \
            for directory in ["GMTKN55"]
    ]

    dataset = Dataset(*assemble_batch(sources, species))
    #---

    #--- setup and train the network ---
    dim = N_BASIS[species]

    structure = [dim, 25, dim]

    network = EluTrNNN(structure)

    network, sess = train_network(network, dataset)
    #---

    save_path = join(root_directory, "tmp" + species + ".npy")
    #try:
    #--- save trained model ---
    save_object = [
        network.structure,
        network.weights_values(sess),
        network.biases_values(sess)
    ]

    np.save(save_path, save_object)
    sess.close()
    msg.info("Session closed", 1)
    #---

    #--- load and reinitialize model ---
    msg.info("Starting new session and loading the model ...", 1)
    sess = tf.Session()
    model = np.load(save_path)

    new_network = EluFixedValue(*model)
    new_network.setup()
    sess.run(tf.global_variables_initializer())

    #finally:
    if isfile(save_path):
        remove(save_path)
Ejemplo n.º 9
0
def fetch_dataset(path, dim):
    #--- the dataset ---
    S, P = np.load(path)

    ind_cut = 150
    index = np.arange(200)
    np.random.shuffle(index)

    S_triu = list(map(lambda x: extract_triu(x, dim), S))
    P_triu = list(map(lambda x: extract_triu(x, dim), P))

    S_test = np.array(S_triu)[index[150:]]
    P_test = np.array(P_triu)[index[150:]]

    S_train = np.array(S_triu)[index[:150]]
    P_train = np.array(P_triu)[index[:150]]

    dataset = Dataset(np.array(S_train), np.array(P_train), split_test=0.0)

    dataset.testing = (Dataset.normalize(S_test,
                                         mean=dataset.x_mean,
                                         std=dataset.x_std)[0], P_test)

    return dataset
Ejemplo n.º 10
0
    def test_normalisation(self):

        dim = 5
        mu = 3
        sigma = 2
        nsamples = 1000

        x = np.random.randn(nsamples, dim) * sigma + mu

        #--- check normlisation with calculated params ---
        x_norm = Dataset.normalize(x)[0]

        self.assertAlmostEqual(0, np.mean(x_norm), delta=self.tolerance)
        self.assertAlmostEqual(1, np.std(x_norm), delta=self.tolerance)
        #---

        #--- check normalisation with given params ---
        x_norm_given_params = Dataset.normalize(x, mean=mu, std=sigma)[0]
        self.assertAlmostEqual(0,
                               np.mean(x_norm_given_params),
                               delta=self.tolerance)
        self.assertAlmostEqual(1,
                               np.std(x_norm_given_params),
                               delta=self.tolerance)
Ejemplo n.º 11
0
def main(species,
         structure,
         save_path=None,
         source=None,
         convergence_threshold=1e-7,
         learning_rate=0.0005,
         regularisation_parameter=0.01,
         mini_batch_size=0.2):

    if structure[0] != N_BASIS[species] or structure[-1] != N_BASIS[species]:
        raise ValueError(
            "Invalid structure. Bad Input/Output dim (should be " + \
            "{0} but was {1}/{2}!".format(
                N_BASIS[species], structure[0], structure[-1]
            )
        )

    #if minibatch is not given in absolute size
    if int(mini_batch_size) == mini_batch_size:
        mini_batch_size = int(mini_batch_size)

    if source is None:
        source = ["../dataset/PyQChem/s22"]

    msg.info("Assembling dataset ...", 2)
    dataset = Dataset(*assemble_batch(source, species))

    msg.info("Training model ...", 2)
    network = EluTrNNN(structure)
    network, sess = train_network(
        network,
        dataset,
        convergence_threshold=convergence_threshold,
        learning_rate=learning_rate,
        regularisation_parameter=regularisation_parameter,
        mini_batch_size=mini_batch_size)

    if not save_path is None:
        msg.info("Storing model ...", 2)
        save_object = [
            network.structure,
            network.weights_values(sess),
            network.biases_values(sess)
        ]

        np.save(save_path, save_object)
Ejemplo n.º 12
0
def main():   

    S, P = np.load("butadien/data/dataset.npy")

    dataset = Dataset(S, P, split_test=0.25)

    msg.info("Starting grid search ", 2)
    with open(log_file, "w") as f:
        info  = "===============================\n"
        info += str(datetime.now()) + "\n\n"
        f.write(info)

    structures = sample_structures()
    for structure in uniquifiy(structures):
        try:
            investigate_structure(dataset, structure)
        except Exception as ex:
            msg.error("Something went wrong during investigation: " + str(ex))
Ejemplo n.º 13
0
from pyscf.scf import hf

from SCFInitialGuess.utilities.usermessages import Messenger as msg
from SCFInitialGuess.utilities.dataset import Dataset, Molecule
from SCFInitialGuess.nn.networks import EluTrNNN
from SCFInitialGuess.nn.training import train_network

dim = 26
model_save_path = "butadien/model.npy"
source = "butadien/data"

msg.info("Welcome", 2)

#--- train network ---
msg.info("Training the network", 2)
dataset = Dataset(*load_data(source))

structure = [dim**2, 200, 100, dim**2]

network, sess = train_network(EluTrNNN(structure),
                              dataset,
                              evaluation_period=100,
                              mini_batch_size=20,
                              convergence_threshold=1e-6)

msg.info("Exporting model", 2)
network.export(sess, model_save_path)


#---
def not_used():