def test_gcn_reload(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model_dir = tempfile.mkdtemp() model = GCNModel(mode='classification', n_tasks=n_tasks, number_atom_features=30, model_dir=model_dir, batch_size=10, learning_rate=0.0003) model.fit(dataset, nb_epoch=70) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 reloaded_model = GCNModel(mode='classification', n_tasks=n_tasks, number_atom_features=30, model_dir=model_dir, batch_size=10, learning_rate=0.0003) reloaded_model.restore() pred_mols = ["CCCC", "CCCCCO", "CCCCC"] X_pred = featurizer(pred_mols) random_dataset = dc.data.NumpyDataset(X_pred) original_pred = model.predict(random_dataset) reload_pred = reloaded_model.predict(random_dataset) assert np.all(original_pred == reload_pred)
def test_attentivefp_regression(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset( 'regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = AttentiveFPModel( mode='regression', n_tasks=len(tasks), num_layers=1, num_timesteps=1, graph_feat_size=2) model.fit(train_set, nb_epoch=1)
def test_gat_classification(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GATModel(mode='classification', n_tasks=n_tasks, number_atom_features=30, batch_size=10, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = dc.models.GATModel(mode='classification', n_tasks=len(tasks), graph_attention_layers=[2], n_attention_heads=1, residual=False, predictor_hidden_feats=2) model.fit(train_set, nb_epoch=1)
def test_mpnn_classification(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = MPNNModel(mode='classification', n_tasks=n_tasks, learning_rate=0.0005) # overfit test model.fit(dataset, nb_epoch=200) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = MPNNModel(mode='classification', n_tasks=len(tasks), node_out_feats=2, edge_hidden_feats=2, num_step_message_passing=1, num_step_set2set=1, num_layer_set2set=1) model.fit(train_set, nb_epoch=1)
def test_gcn_regression(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GCNModel(mode='regression', n_tasks=n_tasks, number_atom_features=30, batch_size=10, learning_rate=0.003) # overfit test model.fit(dataset, nb_epoch=300) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = dc.models.GCNModel(n_tasks=len(tasks), graph_conv_layers=[2], residual=False, predictor_hidden_feats=2) model.fit(train_set, nb_epoch=1)
def test_attentivefp_classification(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset( 'classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = AttentiveFPModel( mode='classification', n_tasks=n_tasks, batch_size=10, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = AttentiveFPModel( mode='classification', n_tasks=len(tasks), num_layers=1, num_timesteps=1, graph_feat_size=2) model.fit(train_set, nb_epoch=1)
def test_jax_model_for_regression(): tasks, dataset, transformers, metric = get_dataset('regression', featurizer='ECFP') # sample network def forward_model(x): net = hk.nets.MLP([512, 256, 128, 2]) return net(x) def rms_loss(pred, tar, w): return jnp.mean(optax.l2_loss(pred, tar)) # Model Initialization params_init, forward_fn = hk.transform(forward_model) rng = jax.random.PRNGKey(500) inputs, _, _, _ = next(iter(dataset.iterbatches(batch_size=256))) modified_inputs = jnp.array( [x.astype(np.float32) if x.dtype == np.float64 else x for x in inputs]) params = params_init(rng, modified_inputs) # Loss Function criterion = rms_loss # JaxModel Working j_m = JaxModel(forward_fn, params, criterion, batch_size=256, learning_rate=0.001, log_frequency=2) _ = j_m.fit(dataset, nb_epochs=25, deterministic=True) scores = j_m.evaluate(dataset, [metric]) assert scores[metric.name] < 0.5
def test_mpnn_regression(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = MPNNModel(mode='regression', n_tasks=n_tasks, batch_size=10) # overfit test model.fit(dataset, nb_epoch=400) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = MPNNModel(mode='regression', n_tasks=len(tasks), node_out_feats=2, edge_hidden_feats=2, num_step_message_passing=1, num_step_set2set=1, num_layer_set2set=1) model.fit(train_set, nb_epoch=1)
def test_pagtn_reload(): # load datasets featurizer = PagtnMolGraphFeaturizer(max_length=5) tasks, dataset, transformers, metric = get_dataset( 'classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model_dir = tempfile.mkdtemp() model = PagtnModel( mode='classification', n_tasks=n_tasks, model_dir=model_dir, batch_size=16) model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 reloaded_model = PagtnModel( mode='classification', n_tasks=n_tasks, model_dir=model_dir, batch_size=16) reloaded_model.restore() pred_mols = ["CCCC", "CCCCCO", "CCCCC"] X_pred = featurizer(pred_mols) random_dataset = dc.data.NumpyDataset(X_pred) original_pred = model.predict(random_dataset) reload_pred = reloaded_model.predict(random_dataset) assert np.all(original_pred == reload_pred)
def test_attentivefp_regression(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = AttentiveFPModel(mode='regression', n_tasks=n_tasks, batch_size=10) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5
def test_gat_regression(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GATModel(mode='regression', n_tasks=n_tasks, batch_size=10) # overfit test # GAT's convergence is a little slow model.fit(dataset, nb_epoch=300) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5
def test_gcn_regression(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GCNModel(mode='regression', n_tasks=n_tasks, number_atom_features=30, batch_size=10) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.5
def test_attentivefp_classification(): # load datasets featurizer = MolGraphConvFeaturizer(use_edges=True) tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = AttentiveFPModel(mode='classification', n_tasks=n_tasks, batch_size=10, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85
def test_gat_classification(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GATModel(n_tasks=n_tasks, loss=losses.L2Loss(), batch_size=4, learning_rate=0.001) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) # TODO: check this asseration is correct or not assert scores['mean_absolute_error'] < 1.0
def test_gat_classification(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GATModel(mode='classification', n_tasks=n_tasks, batch_size=10, learning_rate=0.001) # overfit test # GAT's convergence is a little slow model.fit(dataset, nb_epoch=150) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85
def test_gcn_classification(): # load datasets featurizer = MolGraphConvFeaturizer() tasks, dataset, transformers, metric = get_dataset('classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = GCNModel(mode='classification', n_tasks=n_tasks, number_atom_features=30, batch_size=10, learning_rate=0.0003) # overfit test model.fit(dataset, nb_epoch=70) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85
def test_jax_model_for_classification(): tasks, dataset, transformers, metric = get_dataset('classification', featurizer='ECFP') # sample network class Encoder(hk.Module): def __init__(self, output_size: int = 2): super().__init__() self._network = hk.nets.MLP([512, 256, 128, output_size]) def __call__(self, x: jnp.ndarray): x = self._network(x) return x, jax.nn.softmax(x) def bce_loss(pred, tar, w): tar = jnp.array([ x.astype(np.float32) if x.dtype != np.float32 else x for x in tar ]) return jnp.mean(optax.softmax_cross_entropy(pred[0], tar)) # Model Initilisation params_init, forward_fn = hk.transform(lambda x: Encoder()(x)) # noqa rng = jax.random.PRNGKey(500) inputs, _, _, _ = next(iter(dataset.iterbatches(batch_size=256))) modified_inputs = jnp.array( [x.astype(np.float32) if x.dtype == np.float64 else x for x in inputs]) params = params_init(rng, modified_inputs) # Loss Function criterion = bce_loss # JaxModel Working j_m = JaxModel(forward_fn, params, criterion, output_types=['loss', 'prediction'], batch_size=256, learning_rate=0.001, log_frequency=2) _ = j_m.fit(dataset, nb_epochs=25, deterministic=True) scores = j_m.evaluate(dataset, [metric]) assert scores[metric.name] > 0.8
def test_pagtn_regression(): # load datasets featurizer = PagtnMolGraphFeaturizer(max_length=5) tasks, dataset, transformers, metric = get_dataset( 'regression', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16) # overfit test model.fit(dataset, nb_epoch=150) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean_absolute_error'] < 0.65 # test on a small MoleculeNet dataset from deepchem.molnet import load_delaney tasks, all_dataset, transformers = load_delaney(featurizer=featurizer) train_set, _, _ = all_dataset model = PagtnModel(mode='regression', n_tasks=n_tasks, batch_size=16) model.fit(train_set, nb_epoch=1)
def test_pagtn_classification(): # load datasets featurizer = PagtnMolGraphFeaturizer(max_length=5) tasks, dataset, transformers, metric = get_dataset( 'classification', featurizer=featurizer) # initialize models n_tasks = len(tasks) model = PagtnModel(mode='classification', n_tasks=n_tasks, batch_size=16) # overfit test model.fit(dataset, nb_epoch=100) scores = model.evaluate(dataset, [metric], transformers) assert scores['mean-roc_auc_score'] >= 0.85 # test on a small MoleculeNet dataset from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer) train_set, _, _ = all_dataset model = PagtnModel(mode='classification', n_tasks=len(tasks), batch_size=16) model.fit(train_set, nb_epoch=1)