Example #1
0
  def test_default_featurizer(self):
    smiles = ["C1=CC=CN=C1", "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"]
    featurizer = MolGraphConvFeaturizer()
    graph_feat = featurizer.featurize(smiles)
    assert len(graph_feat) == 2

    # assert "C1=CC=CN=C1"
    assert graph_feat[0].num_nodes == 6
    assert graph_feat[0].num_node_features == 30
    assert graph_feat[0].num_edges == 12

    # assert "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"
    assert graph_feat[1].num_nodes == 22
    assert graph_feat[1].num_node_features == 30
    assert graph_feat[1].num_edges == 44
Example #2
0
  def test_featurizer_with_use_partial_charge(self):
    smiles = ["C1=CC=CN=C1", "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"]
    featurizer = MolGraphConvFeaturizer(use_partial_charge=True)
    graph_feat = featurizer.featurize(smiles)
    assert len(graph_feat) == 2

    # assert "C1=CC=CN=C1"
    assert graph_feat[0].num_nodes == 6
    assert graph_feat[0].num_node_features == 31
    assert graph_feat[0].num_edges == 12

    # assert "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"
    assert graph_feat[1].num_nodes == 22
    assert graph_feat[1].num_node_features == 31
    assert graph_feat[1].num_edges == 44
Example #3
0
def test_mpnn_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = MPNNModel(mode='classification',
                      n_tasks=n_tasks,
                      learning_rate=0.0005)

    # overfit test
    model.fit(dataset, nb_epoch=200)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_bace_classification

    tasks, all_dataset, transformers = load_bace_classification(
        featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = MPNNModel(mode='classification',
                      n_tasks=len(tasks),
                      node_out_feats=2,
                      edge_hidden_feats=2,
                      num_step_message_passing=1,
                      num_step_set2set=1,
                      num_layer_set2set=1)
    model.fit(train_set, nb_epoch=1)
Example #4
0
def test_attentivefp_classification():
  # load datasets
  featurizer = MolGraphConvFeaturizer(use_edges=True)
  tasks, dataset, transformers, metric = get_dataset(
      'classification', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = AttentiveFPModel(
      mode='classification',
      n_tasks=n_tasks,
      batch_size=10,
      learning_rate=0.001)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean-roc_auc_score'] >= 0.85

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_bace_classification

  tasks, all_dataset, transformers = load_bace_classification(
      featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = AttentiveFPModel(
      mode='classification',
      n_tasks=len(tasks),
      num_layers=1,
      num_timesteps=1,
      graph_feat_size=2)
  model.fit(train_set, nb_epoch=1)
Example #5
0
def test_mpnn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = MPNNModel(mode='regression', n_tasks=n_tasks, batch_size=10)

    # overfit test
    model.fit(dataset, nb_epoch=400)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_delaney

    tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = MPNNModel(mode='regression',
                      n_tasks=len(tasks),
                      node_out_feats=2,
                      edge_hidden_feats=2,
                      num_step_message_passing=1,
                      num_step_set2set=1,
                      num_layer_set2set=1)
    model.fit(train_set, nb_epoch=1)
Example #6
0
def test_gcn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GCNModel(mode='regression',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.003)

    # overfit test
    model.fit(dataset, nb_epoch=300)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_delaney

    tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = dc.models.GCNModel(n_tasks=len(tasks),
                               graph_conv_layers=[2],
                               residual=False,
                               predictor_hidden_feats=2)
    model.fit(train_set, nb_epoch=1)
Example #7
0
def test_gat_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='classification',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85

    # test on a small MoleculeNet dataset
    from deepchem.molnet import load_bace_classification

    tasks, all_dataset, transformers = load_bace_classification(
        featurizer=featurizer)
    train_set, _, _ = all_dataset
    model = dc.models.GATModel(mode='classification',
                               n_tasks=len(tasks),
                               graph_attention_layers=[2],
                               n_attention_heads=1,
                               residual=False,
                               predictor_hidden_feats=2)
    model.fit(train_set, nb_epoch=1)
Example #8
0
def test_gcn_reload():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model_dir = tempfile.mkdtemp()
    model = GCNModel(mode='classification',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     model_dir=model_dir,
                     batch_size=10,
                     learning_rate=0.0003)

    model.fit(dataset, nb_epoch=70)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85

    reloaded_model = GCNModel(mode='classification',
                              n_tasks=n_tasks,
                              number_atom_features=30,
                              model_dir=model_dir,
                              batch_size=10,
                              learning_rate=0.0003)
    reloaded_model.restore()

    pred_mols = ["CCCC", "CCCCCO", "CCCCC"]
    X_pred = featurizer(pred_mols)
    random_dataset = dc.data.NumpyDataset(X_pred)
    original_pred = model.predict(random_dataset)
    reload_pred = reloaded_model.predict(random_dataset)
    assert np.all(original_pred == reload_pred)
Example #9
0
def test_attentivefp_regression():
  # load datasets
  featurizer = MolGraphConvFeaturizer(use_edges=True)
  tasks, dataset, transformers, metric = get_dataset(
      'regression', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean_absolute_error'] < 0.5

  # test on a small MoleculeNet dataset
  from deepchem.molnet import load_delaney

  tasks, all_dataset, transformers = load_delaney(featurizer=featurizer)
  train_set, _, _ = all_dataset
  model = AttentiveFPModel(
      mode='regression',
      n_tasks=len(tasks),
      num_layers=1,
      num_timesteps=1,
      graph_feat_size=2)
  model.fit(train_set, nb_epoch=1)
    def test_featurizer_with_self_loop(self):
        smiles = ["C1=CC=CN=C1", "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"]
        featurizer = MolGraphConvFeaturizer(add_self_edges=True)
        graph_feat = featurizer.featurize(smiles)
        assert len(graph_feat) == 2

        # assert "C1=CC=CN=C1"
        assert graph_feat[0].num_nodes == 6
        assert graph_feat[0].num_node_features == 39
        assert graph_feat[0].num_edges == 12 + 6
        assert graph_feat[0].num_edge_features == 11

        # assert "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"
        assert graph_feat[1].num_nodes == 22
        assert graph_feat[1].num_node_features == 39
        assert graph_feat[1].num_edges == 44 + 22
        assert graph_feat[1].num_edge_features == 11
Example #11
0
def test_attentivefp_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5
Example #12
0
def test_gat_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='regression', n_tasks=n_tasks, batch_size=10)

    # overfit test
    # GAT's convergence is a little slow
    model.fit(dataset, nb_epoch=300)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5
Example #13
0
def test_attentivefp_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer(use_edges=True)
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = AttentiveFPModel(mode='classification',
                             n_tasks=n_tasks,
                             batch_size=10,
                             learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85
Example #14
0
def test_gcn_regression():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GCNModel(mode='regression',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.5
Example #15
0
def load_dataset(args):
  splitter = 'scaffold'

  if args['featurizer'] == 'ECFP':
    featurizer = 'ECFP'
  elif args['featurizer'] == 'GC':
    from deepchem.feat import MolGraphConvFeaturizer
    featurizer = MolGraphConvFeaturizer()

  if args['dataset'] == 'BACE_classification':
    from deepchem.molnet import load_bace_classification
    tasks, all_dataset, transformers = load_bace_classification(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'BBBP':
    from deepchem.molnet import load_bbbp
    tasks, all_dataset, transformers = load_bbbp(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'BACE_regression':
    from deepchem.molnet import load_bace_regression
    tasks, all_dataset, transformers = load_bace_regression(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'ClinTox':
    from deepchem.molnet import load_clintox
    tasks, all_dataset, transformers = load_clintox(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'Delaney':
    from deepchem.molnet import load_delaney
    tasks, all_dataset, transformers = load_delaney(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'HOPV':
    from deepchem.molnet import load_hopv
    tasks, all_dataset, transformers = load_hopv(
      featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'SIDER':
    from deepchem.molnet import load_sider
    tasks, all_dataset, transformers = load_sider(
        featurizer=featurizer, splitter=splitter, reload=False)
  elif args['dataset'] == 'Lipo':
    from deepchem.molnet import load_lipo
    tasks, all_dataset, transformers = load_lipo(
        featurizer=featurizer, splitter=splitter, reload=False)
  else:
    raise ValueError('Unexpected dataset: {}'.format(args['dataset']))

  return args, tasks, all_dataset, transformers
Example #16
0
def test_gcn_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GCNModel(mode='classification',
                     n_tasks=n_tasks,
                     number_atom_features=30,
                     batch_size=10,
                     learning_rate=0.0003)

    # overfit test
    model.fit(dataset, nb_epoch=70)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85
Example #17
0
def test_gat_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(mode='classification',
                     n_tasks=n_tasks,
                     batch_size=10,
                     learning_rate=0.001)

    # overfit test
    # GAT's convergence is a little slow
    model.fit(dataset, nb_epoch=150)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.85
Example #18
0
def test_gat_classification():
    # load datasets
    featurizer = MolGraphConvFeaturizer()
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       featurizer=featurizer)

    # initialize models
    n_tasks = len(tasks)
    model = GATModel(n_tasks=n_tasks,
                     loss=losses.L2Loss(),
                     batch_size=4,
                     learning_rate=0.001)

    # overfit test
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    # TODO: check this asseration is correct or not
    assert scores['mean_absolute_error'] < 1.0
Example #19
0
def load_dataset(args):
    splitter = 'scaffold'

    if args['featurizer'] == 'ECFP':
        featurizer = 'ECFP'
    elif args['featurizer'] == 'GC':
        from deepchem.feat import MolGraphConvFeaturizer
        featurizer = MolGraphConvFeaturizer()
    elif args['featurizer'] == 'AC':
        from deepchem.feat import AtomicConvFeaturizer
        featurizer = AtomicConvFeaturizer(frag1_num_atoms=100,
                                          frag2_num_atoms=1000,
                                          complex_num_atoms=1100,
                                          max_num_neighbors=12,
                                          neighbor_cutoff=4)

    if args['dataset'] == 'BACE_classification':
        from deepchem.molnet import load_bace_classification
        tasks, all_dataset, transformers = load_bace_classification(
            featurizer=featurizer, splitter=splitter, reload=False)
    elif args['dataset'] == 'BBBP':
        from deepchem.molnet import load_bbbp
        tasks, all_dataset, transformers = load_bbbp(featurizer=featurizer,
                                                     splitter=splitter,
                                                     reload=False)
    elif args['dataset'] == 'BACE_regression':
        from deepchem.molnet import load_bace_regression
        tasks, all_dataset, transformers = load_bace_regression(
            featurizer=featurizer, splitter=splitter, reload=False)
    elif args['dataset'] == 'ClinTox':
        from deepchem.molnet import load_clintox
        tasks, all_dataset, transformers = load_clintox(featurizer=featurizer,
                                                        splitter=splitter,
                                                        reload=False)
    elif args['dataset'] == 'Delaney':
        from deepchem.molnet import load_delaney
        tasks, all_dataset, transformers = load_delaney(featurizer=featurizer,
                                                        splitter=splitter,
                                                        reload=False)
    elif args['dataset'] == 'HOPV':
        from deepchem.molnet import load_hopv
        tasks, all_dataset, transformers = load_hopv(featurizer=featurizer,
                                                     splitter=splitter,
                                                     reload=False)
    elif args['dataset'] == 'SIDER':
        from deepchem.molnet import load_sider
        tasks, all_dataset, transformers = load_sider(featurizer=featurizer,
                                                      splitter=splitter,
                                                      reload=False)
    elif args['dataset'] == 'Lipo':
        from deepchem.molnet import load_lipo
        tasks, all_dataset, transformers = load_lipo(featurizer=featurizer,
                                                     splitter=splitter,
                                                     reload=False)
    elif args['dataset'] == 'PDBbind':
        from deepchem.molnet import load_pdbbind
        tasks, all_dataset, transformers = load_pdbbind(
            featurizer=featurizer,
            save_dir='.',
            data_dir='.',
            splitter='random',
            pocket=True,
            set_name='core',  # refined
            reload=False)
    else:
        raise ValueError('Unexpected dataset: {}'.format(args['dataset']))

    return args, tasks, all_dataset, transformers