def test_setup(self): nsamples = 200 # create x/y with matching values x = np.arange(nsamples) y = np.arange(nsamples) # dataset should be splitted pure traing 100, validation 50, test 50 candidate = Dataset(x, y, split_test=0.25, split_validation=1.0 / 3.0, normalize_input=False) # check if splitting was correct self.assertEqual(50, len(candidate.testing[0])) self.assertEqual(50, len(candidate.testing[1])) self.assertEqual(50, len(candidate.validation[0])) self.assertEqual(50, len(candidate.validation[1])) self.assertEqual(100, len(candidate.training[0])) self.assertEqual(100, len(candidate.training[1])) # check if x-y pairs are still correct np.testing.assert_array_equal(*candidate.testing) np.testing.assert_array_equal(*candidate.validation) np.testing.assert_array_equal(*candidate.training)
def main(): S, P = np.load("butadien/data/dataset.npy") dataset = Dataset(S, P, split_test=0.25) trainer = Trainer( EluTrNNN([dim**2, 200, 100, dim**2], log_histograms=True), cost_function=IdempotencyPenalty(coupling=1e-6), optimizer=tf.train.AdamOptimizer(learning_rate=5e-3) ) trainer.setup() network_idem, sess_idem = trainer.train( dataset, convergence_threshold=1e-5, #summary_save_path="butadien/log/idem" ) graph_idem = trainer.graph with trainer.graph.as_default(): error = trainer.cost_function.idempotency_error(network_idem) error_val = sess_idem.run(error, {network_idem.input_tensor: dataset.testing[0]}) msg.info("Achieved idempotency error: " + str(error_val), 2)
def main(): S, P = np.load("butadien/data/dataset.npy") dataset = Dataset(S, P, split_test=0.25) save_path = "butadien/scripts/log/idem" try: rmtree(save_path) except: pass trainer = Trainer( SeluTrNNN( [dim**2, 700, 700, dim**2], log_histograms=True ), #error_function=AbsoluteError(), #cost_function=RegularizedMSE(alpha=1e-7), cost_function=IdempotencyPenalty( dataset.inverse_input_transform, coupling=1e-5 ), #optimizer=tf.train.AdamOptimizer(learning_rate=1e-3) ) trainer.setup() network, sess = trainer.train( dataset, convergence_threshold=1e-6, summary_save_path=save_path, mini_batch_size=15 ) graph_idem = trainer.graph with trainer.graph.as_default(): y = tf.placeholder( dtype="float32", shape=[None, network.structure[-1]], name="y" ) error_val = sess.run( AbsoluteError().function(network, y), { network.input_tensor: dataset.testing[0], y: dataset.testing[1] } ) error_idem = sess.run( trainer.cost_function.idempotency_error(network), {network.input_tensor: dataset.testing[0]} ) msg.info("Achieved absolute error: {:0.3E}".format(error_val), 2) msg.info("Achieved idempotency error: {:0.3E}".format(error_idem), 2)
def fetch_dataset(): #--- the dataset --- S, P = np.load("butadien/data/dataset.npy") ind_cut = 150 index = np.arange(200) np.random.shuffle(index) S_test = np.array(S)[index[ind_cut:]] P_test = np.array(P)[index[ind_cut:]] S_train = np.array(S)[index[:ind_cut]] P_train = np.array(P)[index[:ind_cut]] dataset = Dataset(np.array(S_train), np.array(P_train), split_test=0.0) dataset.testing = (Dataset.normalize(S_test, mean=dataset.x_mean, std=dataset.x_std)[0], P_test) #--- return dataset
def _test_train_network_for_1d_function(self, function): x_val = np.random.rand(200, 1) * 2 y_val = function(x_val) dataset = Dataset(x_val, y_val) with tf.Session() as sess: #TODO: umbeneneen! x = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="x") y = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="y") network = EluTrNNN([1, 5, 3, 1]) network.setup(input_tensor=x) optimizer = tf.train.AdamOptimizer() cost = tf.losses.mean_squared_error(y, network.output_tensor) training = optimizer.minimize(cost) sess.run(tf.global_variables_initializer()) old_error = 1e16 n = 0 n_max = 1e4 converged = False while not converged and n < n_max: for i in range(200): sess.run(training, { x: dataset.training[0], y: dataset.training[1] }) error = sess.run(cost, { x: dataset.validation[0], y: dataset.validation[1] }) if np.abs(old_error - error) < 1e-8: converged = True else: old_error = error n += 1 if not converged: self.fail("Training unsuccessfull, max iteration exceeded") np.testing.assert_almost_equal(sess.run( cost, { x: dataset.testing[0], y: dataset.testing[1] }), 0.0, decimal=4)
def setUp(self): msg.print_level = 0 self.structure = [1, 4, 1] self.nsamples = 100 x = np.random.rand(self.nsamples, self.structure[0]) * 10 y = np.sum(x**2, axis=1) self.dataset = Dataset(x.reshape(self.nsamples, self.structure[0]), y.reshape(self.nsamples, self.structure[-1]))
def setUp(self): msg.print_level = 0 self.input_dim = 5 self.output_dim = 5 nsamples = 100 x = np.linspace(-2, 2, nsamples * self.input_dim) y = np.sin(x) self.dataset = Dataset(x.reshape(nsamples, self.input_dim), y.reshape(nsamples, self.output_dim))
def main(species="H"): #--- assemble the dataset --- root_directory = normpath(join(dirname(realpath(__file__)), "../")) dataset_source_folder = join(root_directory, "dataset/") sources = [ join(dataset_source_folder, directory) \ for directory in ["GMTKN55"] ] dataset = Dataset(*assemble_batch(sources, species)) #--- #--- setup and train the network --- dim = N_BASIS[species] structure = [dim, 25, dim] network = EluTrNNN(structure) network, sess = train_network(network, dataset) #--- save_path = join(root_directory, "tmp" + species + ".npy") #try: #--- save trained model --- save_object = [ network.structure, network.weights_values(sess), network.biases_values(sess) ] np.save(save_path, save_object) sess.close() msg.info("Session closed", 1) #--- #--- load and reinitialize model --- msg.info("Starting new session and loading the model ...", 1) sess = tf.Session() model = np.load(save_path) new_network = EluFixedValue(*model) new_network.setup() sess.run(tf.global_variables_initializer()) #finally: if isfile(save_path): remove(save_path)
def fetch_dataset(path, dim): #--- the dataset --- S, P = np.load(path) ind_cut = 150 index = np.arange(200) np.random.shuffle(index) S_triu = list(map(lambda x: extract_triu(x, dim), S)) P_triu = list(map(lambda x: extract_triu(x, dim), P)) S_test = np.array(S_triu)[index[150:]] P_test = np.array(P_triu)[index[150:]] S_train = np.array(S_triu)[index[:150]] P_train = np.array(P_triu)[index[:150]] dataset = Dataset(np.array(S_train), np.array(P_train), split_test=0.0) dataset.testing = (Dataset.normalize(S_test, mean=dataset.x_mean, std=dataset.x_std)[0], P_test) return dataset
def test_normalisation(self): dim = 5 mu = 3 sigma = 2 nsamples = 1000 x = np.random.randn(nsamples, dim) * sigma + mu #--- check normlisation with calculated params --- x_norm = Dataset.normalize(x)[0] self.assertAlmostEqual(0, np.mean(x_norm), delta=self.tolerance) self.assertAlmostEqual(1, np.std(x_norm), delta=self.tolerance) #--- #--- check normalisation with given params --- x_norm_given_params = Dataset.normalize(x, mean=mu, std=sigma)[0] self.assertAlmostEqual(0, np.mean(x_norm_given_params), delta=self.tolerance) self.assertAlmostEqual(1, np.std(x_norm_given_params), delta=self.tolerance)
def main(species, structure, save_path=None, source=None, convergence_threshold=1e-7, learning_rate=0.0005, regularisation_parameter=0.01, mini_batch_size=0.2): if structure[0] != N_BASIS[species] or structure[-1] != N_BASIS[species]: raise ValueError( "Invalid structure. Bad Input/Output dim (should be " + \ "{0} but was {1}/{2}!".format( N_BASIS[species], structure[0], structure[-1] ) ) #if minibatch is not given in absolute size if int(mini_batch_size) == mini_batch_size: mini_batch_size = int(mini_batch_size) if source is None: source = ["../dataset/PyQChem/s22"] msg.info("Assembling dataset ...", 2) dataset = Dataset(*assemble_batch(source, species)) msg.info("Training model ...", 2) network = EluTrNNN(structure) network, sess = train_network( network, dataset, convergence_threshold=convergence_threshold, learning_rate=learning_rate, regularisation_parameter=regularisation_parameter, mini_batch_size=mini_batch_size) if not save_path is None: msg.info("Storing model ...", 2) save_object = [ network.structure, network.weights_values(sess), network.biases_values(sess) ] np.save(save_path, save_object)
def main(): S, P = np.load("butadien/data/dataset.npy") dataset = Dataset(S, P, split_test=0.25) msg.info("Starting grid search ", 2) with open(log_file, "w") as f: info = "===============================\n" info += str(datetime.now()) + "\n\n" f.write(info) structures = sample_structures() for structure in uniquifiy(structures): try: investigate_structure(dataset, structure) except Exception as ex: msg.error("Something went wrong during investigation: " + str(ex))
from pyscf.scf import hf from SCFInitialGuess.utilities.usermessages import Messenger as msg from SCFInitialGuess.utilities.dataset import Dataset, Molecule from SCFInitialGuess.nn.networks import EluTrNNN from SCFInitialGuess.nn.training import train_network dim = 26 model_save_path = "butadien/model.npy" source = "butadien/data" msg.info("Welcome", 2) #--- train network --- msg.info("Training the network", 2) dataset = Dataset(*load_data(source)) structure = [dim**2, 200, 100, dim**2] network, sess = train_network(EluTrNNN(structure), dataset, evaluation_period=100, mini_batch_size=20, convergence_threshold=1e-6) msg.info("Exporting model", 2) network.export(sess, model_save_path) #--- def not_used():