def load_dataset(args): splitter = 'scaffold' if args['featurizer'] == 'ECFP': featurizer = 'ECFP' elif args['featurizer'] == 'GC': from deepchem.feat import MolGraphConvFeaturizer featurizer = MolGraphConvFeaturizer() if args['dataset'] == 'BACE_classification': from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BBBP': from deepchem.molnet import load_bbbp tasks, all_dataset, transformers = load_bbbp( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'ClinTox': from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Delaney': from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'HOPV': from deepchem.molnet import load_hopv tasks, all_dataset, transformers = load_hopv( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'SIDER': from deepchem.molnet import load_sider tasks, all_dataset, transformers = load_sider( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Lipo': from deepchem.molnet import load_lipo tasks, all_dataset, transformers = load_lipo( featurizer=featurizer, splitter=splitter, reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) return args, tasks, all_dataset, transformers
Script that trains multitask models on HOPV dataset. """ from __future__ import print_function from __future__ import division from __future__ import unicode_literals import numpy as np import deepchem as dc from deepchem.molnet import load_hopv # Only for debug! np.random.seed(123) # Load HOPV dataset n_features = 1024 hopv_tasks, hopv_datasets, transformers = load_hopv() train_dataset, valid_dataset, test_dataset = hopv_datasets # Fit models metric = [ dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"), dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression") ] n_layers = 1 n_bypass_layers = 1 nb_epoch = 25 model = dc.models.RobustMultitaskRegressor( len(hopv_tasks),
from __future__ import print_function from __future__ import division from __future__ import unicode_literals import numpy as np from models import GraphConvModel np.random.seed(123) import tensorflow as tf tf.set_random_seed(123) import deepchem as dc from deepchem.molnet import load_hopv # Load HOPV dataset hopv_tasks, hopv_datasets, transformers = load_hopv(featurizer='GraphConv') train_dataset, valid_dataset, test_dataset = hopv_datasets # Fit models metric = [ dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"), dc.metrics.Metric(dc.metrics.mean_absolute_error, np.mean, mode="regression") ] # Number of features on conv-mols n_feat = 75 # Batch size of models batch_size = 50 model = GraphConvModel(len(hopv_tasks),
from __future__ import print_function from __future__ import division from __future__ import unicode_literals import os import shutil import numpy as np import deepchem as dc from deepchem.molnet import load_hopv # Only for debug! np.random.seed(123) # Load HOPV dataset n_features = 1024 hopv_tasks, hopv_datasets, transformers = load_hopv() train_dataset, valid_dataset, test_dataset = hopv_datasets # Fit models metric = [ dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"), dc.metrics.Metric( dc.metrics.mean_absolute_error, np.mean, mode="regression") ] model = dc.models.ProgressiveMultitaskRegressor( len(hopv_tasks), n_features, layer_sizes=[1000], dropouts=[.25], learning_rate=0.001,
from __future__ import print_function from __future__ import division from __future__ import unicode_literals import numpy as np from deepchem.models import GraphConvModel np.random.seed(123) import tensorflow as tf tf.set_random_seed(123) import deepchem as dc from deepchem.molnet import load_hopv # Load HOPV dataset hopv_tasks, hopv_datasets, transformers = load_hopv(featurizer='GraphConv') train_dataset, valid_dataset, test_dataset = hopv_datasets # Fit models metric = [ dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean, mode="regression"), dc.metrics.Metric( dc.metrics.mean_absolute_error, np.mean, mode="regression") ] # Number of features on conv-mols n_feat = 75 # Batch size of models batch_size = 50 model = GraphConvModel( len(hopv_tasks), batch_size=batch_size, mode='regression')
def load_dataset(args): splitter = 'scaffold' if args['featurizer'] == 'ECFP': featurizer = 'ECFP' elif args['featurizer'] == 'GC': from deepchem.feat import MolGraphConvFeaturizer featurizer = MolGraphConvFeaturizer() elif args['featurizer'] == 'AC': from deepchem.feat import AtomicConvFeaturizer featurizer = AtomicConvFeaturizer(frag1_num_atoms=100, frag2_num_atoms=1000, complex_num_atoms=1100, max_num_neighbors=12, neighbor_cutoff=4) if args['dataset'] == 'BACE_classification': from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BBBP': from deepchem.molnet import load_bbbp tasks, all_dataset, transformers = load_bbbp(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'ClinTox': from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Delaney': from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'HOPV': from deepchem.molnet import load_hopv tasks, all_dataset, transformers = load_hopv(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'SIDER': from deepchem.molnet import load_sider tasks, all_dataset, transformers = load_sider(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Lipo': from deepchem.molnet import load_lipo tasks, all_dataset, transformers = load_lipo(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'PDBbind': from deepchem.molnet import load_pdbbind tasks, all_dataset, transformers = load_pdbbind( featurizer=featurizer, save_dir='.', data_dir='.', splitter='random', pocket=True, set_name='core', # refined reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) return args, tasks, all_dataset, transformers