def test_gat_regression(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GATModel(mode='regression', n_tasks=n_tasks, number_atom_features=30, batch_size=10, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=500) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = dc.models.GATModel(mode='regression', n_tasks=len(tasks), graph_attention_layers=[2], n_attention_heads=1, residual=False, predictor_hidden_feats=2) model.fit(train_set, nb_epoch=1)
def test_mpnn_regression(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = MPNNModel(mode='regression', n_tasks=n_tasks, learning_rate=0.0005) # overfit test model.fit(dataset, nb_epoch=400) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = MPNNModel(mode='regression', n_tasks=len(tasks), node_out_feats=2, edge_hidden_feats=2, num_step_message_passing=1, num_step_set2set=1, num_layer_set2set=1) model.fit(train_set, nb_epoch=1)
def get_dataset(mode='classification', featurizer='GraphConv', num_tasks=2): data_points = 20 if mode == 'classification': tasks, all_dataset, transformers = load_bace_classification( featurizer, reload=False) else: tasks, all_dataset, transformers = load_delaney(featurizer, reload=False) train, valid, test = all_dataset for i in range(1, num_tasks): tasks.append("random_task") w = np.ones(shape=(data_points, len(tasks))) if mode == 'classification': y = np.random.randint(0, 2, size=(data_points, len(tasks))) metric = dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean, mode="classification") else: y = np.random.normal(size=(data_points, len(tasks))) metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression") ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points]) return tasks, ds, transformers, metric
def get_dataset(self, mode='classification', featurizer='GraphConv', num_tasks=2): data_points = 10 if mode == 'classification': tasks, all_dataset, transformers = load_bace_classification(featurizer) else: tasks, all_dataset, transformers = load_delaney(featurizer) train, valid, test = all_dataset for i in range(1, num_tasks): tasks.append("random_task") w = np.ones(shape=(data_points, len(tasks))) if mode == 'classification': y = np.random.randint(0, 2, size=(data_points, len(tasks))) metric = dc.metrics.Metric( dc.metrics.roc_auc_score, np.mean, mode="classification") else: y = np.random.normal(size=(data_points, len(tasks))) metric = dc.metrics.Metric( dc.metrics.mean_absolute_error, mode="regression") ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points]) return tasks, ds, transformers, metric
def test_attentivefp_regression(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset( 'regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = AttentiveFPModel( mode='regression', n_tasks=len(tasks), num_layers=1, num_timesteps=1, graph_feat_size=2) model.fit(train_set, nb_epoch=1)
def test_featurization_transformer(): fp_size = 2048 tasks, all_dataset, transformers = load_delaney('Raw') train = all_dataset[0] transformer = FeaturizationTransformer( dataset=train, featurizer=dc.feat.CircularFingerprint(size=fp_size)) new_train = transformer.transform(train) assert new_train.y.shape == train.y.shape assert new_train.X.shape[-1] == fp_size
def test_featurization_transformer(self): fp_size = 2048 tasks, all_dataset, transformers = load_delaney('Raw') train = all_dataset[0] transformer = FeaturizationTransformer( transform_X=True, dataset=train, featurizer=dc.feat.CircularFingerprint(size=fp_size)) new_train = transformer.transform(train) self.assertEqual(new_train.y.shape, train.y.shape) self.assertEqual(new_train.X.shape[-1], fp_size)
def load_dataset(args): splitter = 'scaffold' if args['featurizer'] == 'ECFP': featurizer = 'ECFP' elif args['featurizer'] == 'GC': from deepchem.feat import MolGraphConvFeaturizer featurizer = MolGraphConvFeaturizer() if args['dataset'] == 'BACE_classification': from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BBBP': from deepchem.molnet import load_bbbp tasks, all_dataset, transformers = load_bbbp( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'ClinTox': from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Delaney': from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'HOPV': from deepchem.molnet import load_hopv tasks, all_dataset, transformers = load_hopv( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'SIDER': from deepchem.molnet import load_sider tasks, all_dataset, transformers = load_sider( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Lipo': from deepchem.molnet import load_lipo tasks, all_dataset, transformers = load_lipo( featurizer=featurizer, splitter=splitter, reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) return args, tasks, all_dataset, transformers
def test_pagtn_regression(): # load datasets featurizer = PagtnMolGraphFeaturizer(max_length=5) tasks, dataset, transformers, metric = get_dataset( 'regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16) # overfit test model.fit(dataset, nb_epoch=150) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.65 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16) model.fit(train_set, nb_epoch=1)
from __future__ import print_function from __future__ import division from __future__ import unicode_literals import os import shutil import numpy as np import deepchem as dc from deepchem.molnet import load_delaney # Only for debug! np.random.seed(123) # Load Delaney dataset n_features = 1024 delaney_tasks, delaney_datasets, transformers = load_delaney() train_dataset, valid_dataset, test_dataset = delaney_datasets # Fit models metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean) model = dc.models.MultitaskRegressor( len(delaney_tasks), n_features, layer_sizes=[1000], dropouts=[.25], learning_rate=0.001, batch_size=50, verbosity="high") # Fit trained model
from __future__ import division from __future__ import unicode_literals import numpy as np from deepchem.models import GraphConvModel np.random.seed(123) import tensorflow as tf tf.set_random_seed(123) import deepchem as dc from deepchem.molnet import load_delaney # Load Delaney dataset delaney_tasks, delaney_datasets, transformers = load_delaney( featurizer='GraphConv', split='index') train_dataset, valid_dataset, test_dataset = delaney_datasets # Fit models metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean) # Do setup required for tf/keras models # Number of features on conv-mols n_feat = 75 # Batch size of models batch_size = 128 model = GraphConvModel( len(delaney_tasks), batch_size=batch_size, mode='regression') # Fit trained model model.fit(train_dataset, nb_epoch=20)
def load_dataset(args): splitter = 'scaffold' if args['featurizer'] == 'ECFP': featurizer = 'ECFP' elif args['featurizer'] == 'GC': from deepchem.feat import MolGraphConvFeaturizer featurizer = MolGraphConvFeaturizer() elif args['featurizer'] == 'AC': from deepchem.feat import AtomicConvFeaturizer featurizer = AtomicConvFeaturizer(frag1_num_atoms=100, frag2_num_atoms=1000, complex_num_atoms=1100, max_num_neighbors=12, neighbor_cutoff=4) if args['dataset'] == 'BACE_classification': from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BBBP': from deepchem.molnet import load_bbbp tasks, all_dataset, transformers = load_bbbp(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'ClinTox': from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Delaney': from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'HOPV': from deepchem.molnet import load_hopv tasks, all_dataset, transformers = load_hopv(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'SIDER': from deepchem.molnet import load_sider tasks, all_dataset, transformers = load_sider(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'Lipo': from deepchem.molnet import load_lipo tasks, all_dataset, transformers = load_lipo(featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'PDBbind': from deepchem.molnet import load_pdbbind tasks, all_dataset, transformers = load_pdbbind( featurizer=featurizer, save_dir='.', data_dir='.', splitter='random', pocket=True, set_name='core', # refined reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) return args, tasks, all_dataset, transformers