예제 #1
0
def test_gat_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='regression',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=500)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_delaney

    tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = dc.models.GATModel(mode='regression',
                               n_tasks=len(tasks),
                               graph_attention_layers=[2],
                               n_attention_heads=1,
                               residual=False,
                               predictor_hidden_feats=2)
    model.fit(train_set, nb_epoch=1)
예제 #2
0
def test_mpnn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = MPNNModel(mode='regression', n_tasks=n_tasks, learning_rate=0.0005)

    # overfit test
    model.fit(dataset, nb_epoch=400)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_delaney

    tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = MPNNModel(mode='regression',
                      n_tasks=len(tasks),
                      node_out_feats=2,
                      edge_hidden_feats=2,
                      num_step_message_passing=1,
                      num_step_set2set=1,
                      num_layer_set2set=1)
    model.fit(train_set, nb_epoch=1)
예제 #3
0
def get_dataset(mode='classification', featurizer='GraphConv', num_tasks=2):
    data_points = 20
    if mode == 'classification':
        tasks, all_dataset, transformers = load_bace_classification(
            featurizer, reload=False)
    else:
        tasks, all_dataset, transformers = load_delaney(featurizer,
                                                        reload=False)

    train, valid, test = all_dataset
    for i in range(1, num_tasks):
        tasks.append("random_task")
    w = np.ones(shape=(data_points, len(tasks)))

    if mode == 'classification':
        y = np.random.randint(0, 2, size=(data_points, len(tasks)))
        metric = dc.metrics.Metric(dc.metrics.roc_auc_score,
                                   np.mean,
                                   mode="classification")
    else:
        y = np.random.normal(size=(data_points, len(tasks)))
        metric = dc.metrics.Metric(dc.metrics.mean_absolute_error,
                                   mode="regression")

    ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points])

    return tasks, ds, transformers, metric
예제 #4
0
  def get_dataset(self,
                  mode='classification',
                  featurizer='GraphConv',
                  num_tasks=2):
    data_points = 10
    if mode == 'classification':
      tasks, all_dataset, transformers = load_bace_classification(featurizer)
    else:
      tasks, all_dataset, transformers = load_delaney(featurizer)

    train, valid, test = all_dataset
    for i in range(1, num_tasks):
      tasks.append("random_task")
    w = np.ones(shape=(data_points, len(tasks)))

    if mode == 'classification':
      y = np.random.randint(0, 2, size=(data_points, len(tasks)))
      metric = dc.metrics.Metric(
          dc.metrics.roc_auc_score, np.mean, mode="classification")
    else:
      y = np.random.normal(size=(data_points, len(tasks)))
      metric = dc.metrics.Metric(
          dc.metrics.mean_absolute_error, mode="regression")

    ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points])

    return tasks, ds, transformers, metric
예제 #5
0
def test_attentivefp_regression():
  # load datasets
  featurizer = MolGraphConvFeaturizer(use_edges=True)
  tasks, dataset, transformers, metric = get_dataset(
      'regression', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean_absolute_error'] < 0.5

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_delaney

  tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = AttentiveFPModel(
      mode='regression',
      n_tasks=len(tasks),
      num_layers=1,
      num_timesteps=1,
      graph_feat_size=2)
  model.fit(train_set, nb_epoch=1)
예제 #6
0
def test_featurization_transformer():
    fp_size = 2048
    tasks, all_dataset, transformers = load_delaney('Raw')
    train = all_dataset[0]
    transformer = FeaturizationTransformer(
        dataset=train, featurizer=dc.feat.CircularFingerprint(size=fp_size))
    new_train = transformer.transform(train)

    assert new_train.y.shape == train.y.shape
    assert new_train.X.shape[-1] == fp_size
예제 #7
0
    def test_featurization_transformer(self):
        fp_size = 2048
        tasks, all_dataset, transformers = load_delaney('Raw')
        train = all_dataset[0]
        transformer = FeaturizationTransformer(
            transform_X=True,
            dataset=train,
            featurizer=dc.feat.CircularFingerprint(size=fp_size))
        new_train = transformer.transform(train)

        self.assertEqual(new_train.y.shape, train.y.shape)
        self.assertEqual(new_train.X.shape[-1], fp_size)
예제 #8
0
  def test_featurization_transformer(self):
    fp_size = 2048
    tasks, all_dataset, transformers = load_delaney('Raw')
    train = all_dataset[0]
    transformer = FeaturizationTransformer(
        transform_X=True,
        dataset=train,
        featurizer=dc.feat.CircularFingerprint(size=fp_size))
    new_train = transformer.transform(train)

    self.assertEqual(new_train.y.shape, train.y.shape)
    self.assertEqual(new_train.X.shape[-1], fp_size)
예제 #9
0
def load_dataset(args):
  splitter = 'scaffold'

  if args['featurizer'] == 'ECFP':
    featurizer = 'ECFP'
  elif args['featurizer'] == 'GC':
    from deepchem.feat import MolGraphConvFeaturizer
    featurizer = MolGraphConvFeaturizer()

  if args['dataset'] == 'BACE_classification':
    from deepchem.molnet import load_bace_classification
    tasks, all_dataset, transformers = load_bace_classification(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'BBBP':
    from deepchem.molnet import load_bbbp
    tasks, all_dataset, transformers = load_bbbp(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'BACE_regression':
    from deepchem.molnet import load_bace_regression
    tasks, all_dataset, transformers = load_bace_regression(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'ClinTox':
    from deepchem.molnet import load_clintox
    tasks, all_dataset, transformers = load_clintox(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'Delaney':
    from deepchem.molnet import load_delaney
    tasks, all_dataset, transformers = load_delaney(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'HOPV':
    from deepchem.molnet import load_hopv
    tasks, all_dataset, transformers = load_hopv(
      featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'SIDER':
    from deepchem.molnet import load_sider
    tasks, all_dataset, transformers = load_sider(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'Lipo':
    from deepchem.molnet import load_lipo
    tasks, all_dataset, transformers = load_lipo(
        featurizer=featurizer, splitter=splitter, reload=False)
  else:
    raise ValueError('Unexpected dataset: {}'.format(args['dataset']))

  return args, tasks, all_dataset, transformers
예제 #10
0
def test_pagtn_regression():
  # load datasets
  featurizer = PagtnMolGraphFeaturizer(max_length=5)
  tasks, dataset, transformers, metric = get_dataset(
      'regression', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16)

  # overfit test
  model.fit(dataset, nb_epoch=150)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean_absolute_error'] < 0.65

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_delaney

  tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16)
  model.fit(train_set, nb_epoch=1)
예제 #11
0
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import shutil
import numpy as np
import deepchem as dc
from deepchem.molnet import load_delaney

# Only for debug!
np.random.seed(123)

# Load Delaney dataset
n_features = 1024
delaney_tasks, delaney_datasets, transformers = load_delaney()
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

model = dc.models.MultitaskRegressor(
    len(delaney_tasks),
    n_features,
    layer_sizes=[1000],
    dropouts=[.25],
    learning_rate=0.001,
    batch_size=50,
    verbosity="high")

# Fit trained model
예제 #12
0
from __future__ import division
from __future__ import unicode_literals

import numpy as np

from deepchem.models import GraphConvModel

np.random.seed(123)
import tensorflow as tf

tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_delaney

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(
    featurizer='GraphConv', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
    len(delaney_tasks), batch_size=batch_size, mode='regression')

# Fit trained model
model.fit(train_dataset, nb_epoch=20)
예제 #13
0
def load_dataset(args):
    splitter = 'scaffold'

    if args['featurizer'] == 'ECFP':
        featurizer = 'ECFP'
    elif args['featurizer'] == 'GC':
        from deepchem.feat import MolGraphConvFeaturizer
        featurizer = MolGraphConvFeaturizer()
    elif args['featurizer'] == 'AC':
        from deepchem.feat import AtomicConvFeaturizer
        featurizer = AtomicConvFeaturizer(frag1_num_atoms=100,
                                          frag2_num_atoms=1000,
                                          complex_num_atoms=1100,
                                          max_num_neighbors=12,
                                          neighbor_cutoff=4)

    if args['dataset'] == 'BACE_classification':
        from deepchem.molnet import load_bace_classification
        tasks, all_dataset, transformers = load_bace_classification(
            featurizer=featurizer, splitter=splitter, reload=False)
    elif args['dataset'] == 'BBBP':
        from deepchem.molnet import load_bbbp
        tasks, all_dataset, transformers = load_bbbp(featurizer=featurizer,
                                                     splitter=splitter,
                                                     reload=False)
    elif args['dataset'] == 'BACE_regression':
        from deepchem.molnet import load_bace_regression
        tasks, all_dataset, transformers = load_bace_regression(
            featurizer=featurizer, splitter=splitter, reload=False)
    elif args['dataset'] == 'ClinTox':
        from deepchem.molnet import load_clintox
        tasks, all_dataset, transformers = load_clintox(featurizer=featurizer,
                                                        splitter=splitter,
                                                        reload=False)
    elif args['dataset'] == 'Delaney':
        from deepchem.molnet import load_delaney
        tasks, all_dataset, transformers = load_delaney(featurizer=featurizer,
                                                        splitter=splitter,
                                                        reload=False)
    elif args['dataset'] == 'HOPV':
        from deepchem.molnet import load_hopv
        tasks, all_dataset, transformers = load_hopv(featurizer=featurizer,
                                                     splitter=splitter,
                                                     reload=False)
    elif args['dataset'] == 'SIDER':
        from deepchem.molnet import load_sider
        tasks, all_dataset, transformers = load_sider(featurizer=featurizer,
                                                      splitter=splitter,
                                                      reload=False)
    elif args['dataset'] == 'Lipo':
        from deepchem.molnet import load_lipo
        tasks, all_dataset, transformers = load_lipo(featurizer=featurizer,
                                                     splitter=splitter,
                                                     reload=False)
    elif args['dataset'] == 'PDBbind':
        from deepchem.molnet import load_pdbbind
        tasks, all_dataset, transformers = load_pdbbind(
            featurizer=featurizer,
            save_dir='.',
            data_dir='.',
            splitter='random',
            pocket=True,
            set_name='core',  # refined
            reload=False)
    else:
        raise ValueError('Unexpected dataset: {}'.format(args['dataset']))

    return args, tasks, all_dataset, transformers