def test_graph_conv_regression_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'GraphConv') batch_size = 50 model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression') model.fit(dataset, nb_epoch=1) scores = model.evaluate(dataset, [metric], transformers) model.save() model = TensorGraph.load_from_dir(model.model_dir) scores = model.evaluate(dataset, [metric], transformers)
def test_graph_conv_regression_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'GraphConv') batch_size = 50 model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression') model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert all(s < 0.1 for s in scores['mean_absolute_error']) model.save() model = TensorGraph.load_from_dir(model.model_dir) scores2 = model.evaluate(dataset, [metric], transformers) assert np.allclose(scores['mean_absolute_error'], scores2['mean_absolute_error'])
def generate_graph_conv_model(): batch_size = 128 model = GraphConvModel(1, batch_size=batch_size, mode="classification", model_dir="/tmp/mk01/model_dir") dataset_file = "dude_ace.csv" tasks = ["is_active"] featurizer = dc.feat.ConvMolFeaturizer() loader = dc.data.CSVLoader(tasks=tasks, smiles_field="SMILES", featurizer=featurizer) dataset = loader.featurize(dataset_file, shard_size=8192) splitter = dc.splits.RandomSplitter() metrics = [ dc.metrics.Metric(dc.metrics.matthews_corrcoef, np.mean, mode="classification") ] training_score_list = [] validation_score_list = [] transformers = [] model.fit(dataset) print(model.evaluate(dataset, metrics)) return model
def test_graph_conv_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'classification', 'GraphConv') batch_size = 50 model = GraphConvModel( len(tasks), batch_size=batch_size, mode='classification') model.fit(dataset, nb_epoch=10) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9 model.save() model = TensorGraph.load_from_dir(model.model_dir) scores2 = model.evaluate(dataset, [metric], transformers) assert np.allclose(scores['mean-roc_auc_score'], scores2['mean-roc_auc_score'])
def test_graph_conv_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'classification', 'GraphConv') batch_size = 50 model = GraphConvModel( len(tasks), batch_size=batch_size, mode='classification') model.fit(dataset, nb_epoch=10) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9 model.save() model = TensorGraph.load_from_dir(model.model_dir) scores2 = model.evaluate(dataset, [metric], transformers) assert np.allclose(scores['mean-roc_auc_score'], scores2['mean-roc_auc_score'])
def test_graph_conv_regression_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'GraphConv') batch_size = 50 model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression') model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert all(s < 0.1 for s in scores['mean_absolute_error']) model.save() model = TensorGraph.load_from_dir(model.model_dir) scores2 = model.evaluate(dataset, [metric], transformers) assert np.allclose( scores['mean_absolute_error'], scores2['mean_absolute_error'], rtol=1e-4)
def test_graph_conv_model(): batch_size = 2000 model = GraphConvModel(1, batch_size=batch_size, mode="classification", model_dir="/tmp/covid/model_dir") dataset_file = "covid_mpro_combined_data_sources.csv" tasks = ["isHit"] featurizer = dc.feat.ConvMolFeaturizer() loader = dc.data.CSVLoader(tasks=tasks, smiles_field="SMILES", featurizer=featurizer) dataset = loader.featurize(dataset_file, shard_size=8192) metrics = [ dc.metrics.Metric(dc.metrics.matthews_corrcoef, np.mean, mode="classification") ] splitter = dc.splits.RandomSplitter() train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split( dataset) model.fit(train_dataset) pred = [x.flatten() for x in model.predict(valid_dataset)] pred_df = pd.DataFrame(pred, columns=["neg", "pos"]) pred_df["active"] = [int(x) for x in valid_dataset.y] pred_df["SMILES"] = valid_dataset.ids sns.boxplot(pred_df.active, pred_df.pos) print(model.evaluate(train_dataset, metrics)) print(model.evaluate(test_dataset, metrics)) metrics = [ dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") ] print(model.evaluate(train_dataset, metrics)) print(model.evaluate(test_dataset, metrics))
def test_graph_conv_regression_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'regression', 'GraphConv') batch_size = 50 model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression') model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert all(s < 0.1 for s in scores['mean_absolute_error'])
def test_graph_conv_model(self): tasks, dataset, transformers, metric = self.get_dataset( 'classification', 'GraphConv') batch_size = 50 model = GraphConvModel(len(tasks), batch_size=batch_size, mode='classification') model.fit(dataset, nb_epoch=10) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.9
def test_graph_conv_regression_model(): tasks, dataset, transformers, metric = get_dataset('regression', 'GraphConv') batch_size = 10 model = GraphConvModel(len(tasks), batch_size=batch_size, batch_normalize=False, mode='regression') model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.1
from deepchem.molnet import load_delaney # Load Delaney dataset delaney_tasks, delaney_datasets, transformers = load_delaney( featurizer='GraphConv', split='index') train_dataset, valid_dataset, test_dataset = delaney_datasets # Fit models metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean) # Do setup required for tf/keras models # Number of features on conv-mols n_feat = 75 # Batch size of models batch_size = 128 model = GraphConvModel( len(delaney_tasks), batch_size=batch_size, mode='regression') # Fit trained model model.fit(train_dataset, nb_epoch=20) print("Evaluating model") train_scores = model.evaluate(train_dataset, [metric], transformers) valid_scores = model.evaluate(valid_dataset, [metric], transformers) print("Train scores") print(train_scores) print("Validation scores") print(valid_scores)
start_time = time.time() num_epochs = 100 losses_train = [] score_valid = [] score_train = [] for i in range(num_epochs): loss_train = model.fit(train_dataset, nb_epoch=1, deterministic=True) ckpt.step.assign_add(1) save_path = manager.save() print("Saved checkpoint for step {}: {} ".format(int(ckpt.step), save_path)) model.save_checkpoint(max_checkpoints_to_keep=20, model_dir=save_path) #model.restore() R2_train = model.evaluate(train_dataset, [metric])['r2_score'] R2_valid = model.evaluate(valid_dataset, [metric])['r2_score'] print("Epoch %d loss_train: %f R2_train %f R2_valid: %f " % (i, loss_train, R2_train, R2_valid)) losses_train.append(loss_train) score_valid.append(R2_valid) score_train.append(R2_train) ####Time print("--- %s seconds ---" % (time.time() - start_time)) df = pd.DataFrame(list(zip(losses_train, score_train, score_valid)), columns=['train-loss', 'train-R2score', 'valid-R2score']) df.to_csv("score-hpopt.csv")
mols = [Chem.MolFromSmiles(smile) for smile in smiles] feat = dc.feat.CircularFingerprint(size=1024) arr = feat.featurize(mols) print(arr) feat = dc.feat.RDKitDescriptors() arr = feat.featurize(mols) print(arr) tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='GraphConv') train_dataset, valid_dataset, test_dataset = datasets model = GraphConvModel(n_tasks=1, mode='regression', dropout=0.2) model.fit(train_dataset, nb_epoch=100) metric = dc.metrics.Metric(dc.metrics.pearson_r2_score) print(model.evaluate(train_dataset, [metric], transformers)) print(model.evaluate(test_dataset, [metric], transformers)) smiles = ['COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C', 'CCOC(=O)CC', 'CSc1nc(NC(C)C)nc(NC(C)C)n1', 'CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1', 'Cc1cc2ccccc2cc1C'] from rdkit import Chem mols = [Chem.MolFromSmiles(s) for s in smiles] featurizer = dc.feat.ConvMolFeaturizer() x = featurizer.featurize(mols) predicted_solubility = model.predict_on_batch(x)