def generate_dataset(x1, x2, y): #load data qm9_tasks, datasets, transformers = load_qm9() train_dataset, valid_dataset, test_dataset = datasets print("x1 = ", qm9_tasks[x1 - 1]) print("x2 = ", qm9_tasks[x2 - 1]) print("y = ", qm9_tasks[y - 1]) #extrct the 'y'values Y = test_dataset.y YT = Y.T X1 = YT[x1 - 1] X2 = YT[x2 - 1] Y_a = YT[y - 1] x1 = X1.tolist() x2 = X2.tolist() y_l = Y_a.tolist() l = Y_a.shape n = np.random.uniform(0, l, 1).astype(np.int) #set the number of noise added ni = np.random.uniform(0, l, n) #n random values an = len(ni) #add noise to n numbers of y for i in range(an): mu = 0 sigma = (x1[i] + x2[i]) / 2 noise = np.random.normal(mu, np.abs(sigma), n) g = noise.tolist() y_l[i] += g[i] #save to_csv: dataframe = pd.DataFrame({'x1': X1, 'x2': X2, 'y': y_l}) dataframe.to_csv("y_gen_data.csv", index=False, sep=',') gen_data = pd.read_csv('y_gen_data.csv')
""" Script that trains Tensorflow multitask models on QM9 dataset. """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals import os import deepchem as dc import numpy as np from deepchem.molnet import load_qm9 np.random.seed(123) qm9_tasks, datasets, transformers = load_qm9() train_dataset, valid_dataset, test_dataset = datasets fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)] regression_metric = [ dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"), dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression") ] model = dc.models.MultiTaskFitTransformRegressor( n_tasks=len(qm9_tasks), n_features=[29, 29], learning_rate=0.001, momentum=.8, batch_size=32, weight_init_stddevs=[1 / np.sqrt(400), 1 / np.sqrt(100), 1 / np.sqrt(100)], bias_init_consts=[0., 0., 0.], layer_sizes=[400, 100, 100], dropouts=[0.01, 0.01, 0.01], fit_transformers=fit_transformers,