Ejemplo n.º 1
0
def test_gcn_reload():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model_dir = tempfile.mkdtemp()
    model = GCNModel(mode='classification',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     model_dir=model_dir,
                     batch_size=10,
                     learning_rate=0.0003)

    model.fit(dataset, nb_epoch=70)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85

    reloaded_model = GCNModel(mode='classification',
                              n_tasks=n_tasks,
                              number_atom_features=30,
                              model_dir=model_dir,
                              batch_size=10,
                              learning_rate=0.0003)
    reloaded_model.restore()

    pred_mols = ["CCCC", "CCCCCO", "CCCCC"]
    X_pred = featurizer(pred_mols)
    random_dataset = dc.data.NumpyDataset(X_pred)
    original_pred = model.predict(random_dataset)
    reload_pred = reloaded_model.predict(random_dataset)
    assert np.all(original_pred == reload_pred)
Ejemplo n.º 2
0
def test_attentivefp_regression():
  # load datasets
  featurizer = MolGraphConvFeaturizer(use_edges=True)
  tasks, dataset, transformers, metric = get_dataset(
      'regression', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean_absolute_error'] < 0.5

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_delaney

  tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = AttentiveFPModel(
      mode='regression',
      n_tasks=len(tasks),
      num_layers=1,
      num_timesteps=1,
      graph_feat_size=2)
  model.fit(train_set, nb_epoch=1)
Ejemplo n.º 3
0
def test_gat_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='classification',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_bace_classification

    tasks, all_dataset, transformers = load_bace_classification(
        featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = dc.models.GATModel(mode='classification',
                               n_tasks=len(tasks),
                               graph_attention_layers=[2],
                               n_attention_heads=1,
                               residual=False,
                               predictor_hidden_feats=2)
    model.fit(train_set, nb_epoch=1)
Ejemplo n.º 4
0
def test_mpnn_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = MPNNModel(mode='classification',
                      n_tasks=n_tasks,
                      learning_rate=0.0005)

    # overfit test
    model.fit(dataset, nb_epoch=200)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_bace_classification

    tasks, all_dataset, transformers = load_bace_classification(
        featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = MPNNModel(mode='classification',
                      n_tasks=len(tasks),
                      node_out_feats=2,
                      edge_hidden_feats=2,
                      num_step_message_passing=1,
                      num_step_set2set=1,
                      num_layer_set2set=1)
    model.fit(train_set, nb_epoch=1)
Ejemplo n.º 5
0
def test_gcn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GCNModel(mode='regression',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.003)

    # overfit test
    model.fit(dataset, nb_epoch=300)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_delaney

    tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = dc.models.GCNModel(n_tasks=len(tasks),
                               graph_conv_layers=[2],
                               residual=False,
                               predictor_hidden_feats=2)
    model.fit(train_set, nb_epoch=1)
Ejemplo n.º 6
0
def test_attentivefp_classification():
  # load datasets
  featurizer = MolGraphConvFeaturizer(use_edges=True)
  tasks, dataset, transformers, metric = get_dataset(
      'classification', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = AttentiveFPModel(
      mode='classification',
      n_tasks=n_tasks,
      batch_size=10,
      learning_rate=0.001)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean-roc_auc_score'] >= 0.85

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_bace_classification

  tasks, all_dataset, transformers = load_bace_classification(
      featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = AttentiveFPModel(
      mode='classification',
      n_tasks=len(tasks),
      num_layers=1,
      num_timesteps=1,
      graph_feat_size=2)
  model.fit(train_set, nb_epoch=1)
Ejemplo n.º 7
0
def test_jax_model_for_regression():
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer='ECFP')

    # sample network
    def forward_model(x):
        net = hk.nets.MLP([512, 256, 128, 2])
        return net(x)

    def rms_loss(pred, tar, w):
        return jnp.mean(optax.l2_loss(pred, tar))

    # Model Initialization
    params_init, forward_fn = hk.transform(forward_model)
    rng = jax.random.PRNGKey(500)
    inputs, _, _, _ = next(iter(dataset.iterbatches(batch_size=256)))
    modified_inputs = jnp.array(
        [x.astype(np.float32) if x.dtype == np.float64 else x for x in inputs])
    params = params_init(rng, modified_inputs)

    # Loss Function
    criterion = rms_loss

    # JaxModel Working
    j_m = JaxModel(forward_fn,
                   params,
                   criterion,
                   batch_size=256,
                   learning_rate=0.001,
                   log_frequency=2)
    _ = j_m.fit(dataset, nb_epochs=25, deterministic=True)
    scores = j_m.evaluate(dataset, [metric])
    assert scores[metric.name] < 0.5
Ejemplo n.º 8
0
def test_mpnn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = MPNNModel(mode='regression', n_tasks=n_tasks, batch_size=10)

    # overfit test
    model.fit(dataset, nb_epoch=400)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_delaney

    tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = MPNNModel(mode='regression',
                      n_tasks=len(tasks),
                      node_out_feats=2,
                      edge_hidden_feats=2,
                      num_step_message_passing=1,
                      num_step_set2set=1,
                      num_layer_set2set=1)
    model.fit(train_set, nb_epoch=1)
Ejemplo n.º 9
0
def test_pagtn_reload():
  # load datasets
  featurizer = PagtnMolGraphFeaturizer(max_length=5)
  tasks, dataset, transformers, metric = get_dataset(
      'classification', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model_dir = tempfile.mkdtemp()
  model = PagtnModel(
      mode='classification',
      n_tasks=n_tasks,
      model_dir=model_dir,
      batch_size=16)

  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean-roc_auc_score'] >= 0.85

  reloaded_model = PagtnModel(
      mode='classification',
      n_tasks=n_tasks,
      model_dir=model_dir,
      batch_size=16)
  reloaded_model.restore()

  pred_mols = ["CCCC", "CCCCCO", "CCCCC"]
  X_pred = featurizer(pred_mols)
  random_dataset = dc.data.NumpyDataset(X_pred)
  original_pred = model.predict(random_dataset)
  reload_pred = reloaded_model.predict(random_dataset)
  assert np.all(original_pred == reload_pred)
Ejemplo n.º 10
0
def test_attentivefp_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5
Ejemplo n.º 11
0
def test_gat_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='regression', n_tasks=n_tasks, batch_size=10)

    # overfit test
    # GAT's convergence is a little slow
    model.fit(dataset, nb_epoch=300)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5
Ejemplo n.º 12
0
def test_gcn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GCNModel(mode='regression',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5
Ejemplo n.º 13
0
def test_attentivefp_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = AttentiveFPModel(mode='classification',
                             n_tasks=n_tasks,
                             batch_size=10,
                             learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85
Ejemplo n.º 14
0
def test_gat_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(n_tasks=n_tasks,
                     loss=losses.L2Loss(),
                     batch_size=4,
                     learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    # TODO: check this asseration is correct or not
    assert scores['mean_absolute_error'] < 1.0
Ejemplo n.º 15
0
def test_gat_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='classification',
                     n_tasks=n_tasks,
                     batch_size=10,
                     learning_rate=0.001)

    # overfit test
    # GAT's convergence is a little slow
    model.fit(dataset, nb_epoch=150)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85
Ejemplo n.º 16
0
def test_gcn_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GCNModel(mode='classification',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.0003)

    # overfit test
    model.fit(dataset, nb_epoch=70)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85
Ejemplo n.º 17
0
def test_jax_model_for_classification():
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer='ECFP')

    # sample network
    class Encoder(hk.Module):
        def __init__(self, output_size: int = 2):
            super().__init__()
            self._network = hk.nets.MLP([512, 256, 128, output_size])

        def __call__(self, x: jnp.ndarray):
            x = self._network(x)
            return x, jax.nn.softmax(x)

    def bce_loss(pred, tar, w):
        tar = jnp.array([
            x.astype(np.float32) if x.dtype != np.float32 else x for x in tar
        ])
        return jnp.mean(optax.softmax_cross_entropy(pred[0], tar))

    # Model Initilisation
    params_init, forward_fn = hk.transform(lambda x: Encoder()(x))  # noqa
    rng = jax.random.PRNGKey(500)
    inputs, _, _, _ = next(iter(dataset.iterbatches(batch_size=256)))
    modified_inputs = jnp.array(
        [x.astype(np.float32) if x.dtype == np.float64 else x for x in inputs])
    params = params_init(rng, modified_inputs)

    # Loss Function
    criterion = bce_loss

    # JaxModel Working
    j_m = JaxModel(forward_fn,
                   params,
                   criterion,
                   output_types=['loss', 'prediction'],
                   batch_size=256,
                   learning_rate=0.001,
                   log_frequency=2)
    _ = j_m.fit(dataset, nb_epochs=25, deterministic=True)
    scores = j_m.evaluate(dataset, [metric])
    assert scores[metric.name] > 0.8
Ejemplo n.º 18
0
def test_pagtn_regression():
  # load datasets
  featurizer = PagtnMolGraphFeaturizer(max_length=5)
  tasks, dataset, transformers, metric = get_dataset(
      'regression', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16)

  # overfit test
  model.fit(dataset, nb_epoch=150)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean_absolute_error'] < 0.65

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_delaney

  tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16)
  model.fit(train_set, nb_epoch=1)
Ejemplo n.º 19
0
def test_pagtn_classification():
  # load datasets
  featurizer = PagtnMolGraphFeaturizer(max_length=5)
  tasks, dataset, transformers, metric = get_dataset(
      'classification', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = PagtnModel(mode='classification', n_tasks=n_tasks, batch_size=16)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean-roc_auc_score'] >= 0.85

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_bace_classification

  tasks, all_dataset, transformers = load_bace_classification(
      featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = PagtnModel(mode='classification', n_tasks=len(tasks), batch_size=16)
  model.fit(train_set, nb_epoch=1)