from __future__ import print_function from __future__ import division from __future__ import unicode_literals import os import shutil import numpy as np import deepchem as dc from tox21_datasets import load_tox21 # Only for debug! np.random.seed(123) # Load Tox21 dataset n_features = 1024 tox21_tasks, tox21_datasets, transformers = load_tox21() train_dataset, valid_dataset, test_dataset = tox21_datasets # Fit models metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") model = dc.models.TensorflowMultiTaskClassifier(len(tox21_tasks), n_features, layer_sizes=[1000], dropouts=[.25], learning_rate=0.001, batch_size=50, verbosity="high")
# Set some global variables up top verbosity = "high" #Make directories to store the raw and featurized datasets. base_dir = "/tmp/tox21_tf" data_dir = os.path.join(base_dir, "dataset") model_dir = os.path.join(base_dir, "model") # This is for good debug (to make sure nasty state isn't being passed around) if os.path.exists(base_dir): shutil.rmtree(base_dir) os.makedirs(base_dir) # Load Tox21 dataset n_features = 1024 tox21_tasks, tox21_datasets, transformers = load_tox21(data_dir, reload=False) # Do train/valid split. train_dataset, valid_dataset = tox21_datasets # Fit models classification_metric = Metric(metrics.roc_auc_score, np.mean, verbosity=verbosity, mode="classification") tensorflow_model = TensorflowMultiTaskClassifier( len(tox21_tasks), n_features, model_dir, dropouts=[.25], learning_rate=0.0003, weight_init_stddevs=[1.], batch_size=32, verbosity=verbosity) model = TensorflowModel(tensorflow_model, model_dir) # Fit trained model
d[label] = to_one_hot(y_b[:, index]) d[task_weights] = w_b multiConvMol = ConvMol.agglomerate_mols(X_b) d[atom_features] = multiConvMol.get_atom_features() d[degree_slice] = multiConvMol.deg_slice d[membership] = multiConvMol.membership for i in range(1, len(multiConvMol.get_deg_adjacency_lists())): d[deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i] yield d return model, feed_dict_generator, labels, task_weights # Load Tox21 dataset tox21_tasks, tox21_datasets, transformers = load_tox21(featurizer='GraphConv') train_dataset, valid_dataset, test_dataset = tox21_datasets print(train_dataset.data_dir) print(valid_dataset.data_dir) # Fit models metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") # Batch size of models batch_size = 50 model, generator, labels, task_weights = graph_conv_model( batch_size, tox21_tasks)
Script that trains multitask models on Tox21 dataset. """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals import numpy as np import deepchem as dc from tox21_datasets import load_tox21 # Only for debug! np.random.seed(123) # Load Tox21 dataset n_features = 1024 tox21_tasks, tox21_datasets, transformers = load_tox21() train_dataset, valid_dataset, test_dataset = tox21_datasets # Fit models metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean) model = dc.models.TensorflowMultiTaskClassifier( len(tox21_tasks), n_features, layer_sizes=[1000], dropouts=[.25], learning_rate=0.001, batch_size=50) # Fit trained model model.fit(train_dataset) model.save() print("Evaluating model") train_scores = model.evaluate(train_dataset, [metric], transformers)
""" Script that trains graph-conv models on Tox21 dataset. """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals import numpy as np np.random.seed(123) import tensorflow as tf tf.set_random_seed(123) import deepchem as dc from tox21_datasets import load_tox21 # Load Tox21 dataset tox21_tasks, tox21_datasets, transformers = load_tox21(featurizer='GraphConv') train_dataset, valid_dataset, test_dataset = tox21_datasets # Fit models metric = dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean, mode="classification") # Number of features on conv-mols n_feat = 75 # Batch size of models batch_size = 50 graph_model = dc.nn.SequentialGraph(n_feat) graph_model.add(dc.nn.GraphConv(64, n_feat, activation='relu')) graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1)) graph_model.add(dc.nn.GraphPool()) graph_model.add(dc.nn.GraphConv(64, 64, activation='relu'))
import numpy as np import json np.random.seed(123) import tensorflow as tf tf.set_random_seed(123) import deepchem as dc from tox21_datasets import load_tox21 from deepchem.models.tensorgraph.models.graph_models import PetroskiSuchTensorGraph model_dir = "/tmp/graph_conv" # Load Tox21 dataset tox21_tasks, tox21_datasets, transformers = load_tox21(featurizer='AdjMatrix') train_dataset, valid_dataset, test_dataset = tox21_datasets print(train_dataset.data_dir) print(valid_dataset.data_dir) # Fit models metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") # Batch size of models batch_size = 128 model = PetroskiSuchTensorGraph(len(tox21_tasks), batch_size=batch_size, mode='classification')