def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)
    scores = model.evaluate(dataset, [metric], transformers)

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores = model.evaluate(dataset, [metric], transformers)
  def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])
Beispiel #3
0
def generate_graph_conv_model():
    batch_size = 128
    model = GraphConvModel(1,
                           batch_size=batch_size,
                           mode="classification",
                           model_dir="/tmp/mk01/model_dir")

    dataset_file = "dude_ace.csv"
    tasks = ["is_active"]
    featurizer = dc.feat.ConvMolFeaturizer()
    loader = dc.data.CSVLoader(tasks=tasks,
                               smiles_field="SMILES",
                               featurizer=featurizer)
    dataset = loader.featurize(dataset_file, shard_size=8192)

    splitter = dc.splits.RandomSplitter()

    metrics = [
        dc.metrics.Metric(dc.metrics.matthews_corrcoef,
                          np.mean,
                          mode="classification")
    ]

    training_score_list = []
    validation_score_list = []
    transformers = []

    model.fit(dataset)
    print(model.evaluate(dataset, metrics))
    return model
  def test_graph_conv_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(
        len(tasks), batch_size=batch_size, mode='classification')

    model.fit(dataset, nb_epoch=10)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])
  def test_graph_conv_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(
        len(tasks), batch_size=batch_size, mode='classification')

    model.fit(dataset, nb_epoch=10)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])
  def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(
        scores['mean_absolute_error'],
        scores2['mean_absolute_error'],
        rtol=1e-4)
Beispiel #7
0
def test_graph_conv_model():
    batch_size = 2000
    model = GraphConvModel(1,
                           batch_size=batch_size,
                           mode="classification",
                           model_dir="/tmp/covid/model_dir")
    dataset_file = "covid_mpro_combined_data_sources.csv"
    tasks = ["isHit"]
    featurizer = dc.feat.ConvMolFeaturizer()
    loader = dc.data.CSVLoader(tasks=tasks,
                               smiles_field="SMILES",
                               featurizer=featurizer)
    dataset = loader.featurize(dataset_file, shard_size=8192)

    metrics = [
        dc.metrics.Metric(dc.metrics.matthews_corrcoef,
                          np.mean,
                          mode="classification")
    ]

    splitter = dc.splits.RandomSplitter()

    train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
        dataset)

    model.fit(train_dataset)

    pred = [x.flatten() for x in model.predict(valid_dataset)]
    pred_df = pd.DataFrame(pred, columns=["neg", "pos"])
    pred_df["active"] = [int(x) for x in valid_dataset.y]
    pred_df["SMILES"] = valid_dataset.ids

    sns.boxplot(pred_df.active, pred_df.pos)

    print(model.evaluate(train_dataset, metrics))
    print(model.evaluate(test_dataset, metrics))

    metrics = [
        dc.metrics.Metric(dc.metrics.roc_auc_score,
                          np.mean,
                          mode="classification")
    ]
    print(model.evaluate(train_dataset, metrics))
    print(model.evaluate(test_dataset, metrics))
Beispiel #8
0
    def test_graph_conv_regression_model(self):
        tasks, dataset, transformers, metric = self.get_dataset(
            'regression', 'GraphConv')

        batch_size = 50
        model = GraphConvModel(len(tasks),
                               batch_size=batch_size,
                               mode='regression')

        model.fit(dataset, nb_epoch=100)
        scores = model.evaluate(dataset, [metric], transformers)
        assert all(s < 0.1 for s in scores['mean_absolute_error'])
Beispiel #9
0
    def test_graph_conv_model(self):
        tasks, dataset, transformers, metric = self.get_dataset(
            'classification', 'GraphConv')

        batch_size = 50
        model = GraphConvModel(len(tasks),
                               batch_size=batch_size,
                               mode='classification')

        model.fit(dataset, nb_epoch=10)
        scores = model.evaluate(dataset, [metric], transformers)
        assert scores['mean-roc_auc_score'] >= 0.9
Beispiel #10
0
def test_graph_conv_regression_model():
    tasks, dataset, transformers, metric = get_dataset('regression',
                                                       'GraphConv')

    batch_size = 10
    model = GraphConvModel(len(tasks),
                           batch_size=batch_size,
                           batch_normalize=False,
                           mode='regression')

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.1
from deepchem.molnet import load_delaney

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(
    featurizer='GraphConv', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 128
model = GraphConvModel(
    len(delaney_tasks), batch_size=batch_size, mode='regression')

# Fit trained model
model.fit(train_dataset, nb_epoch=20)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
Beispiel #12
0
start_time = time.time()

num_epochs = 100
losses_train = []
score_valid = []
score_train = []
for i in range(num_epochs):
    loss_train = model.fit(train_dataset, nb_epoch=1, deterministic=True)
    ckpt.step.assign_add(1)
    save_path = manager.save()
    print("Saved checkpoint for step {}: {} ".format(int(ckpt.step),
                                                     save_path))
    model.save_checkpoint(max_checkpoints_to_keep=20, model_dir=save_path)
    #model.restore()
    R2_train = model.evaluate(train_dataset, [metric])['r2_score']
    R2_valid = model.evaluate(valid_dataset, [metric])['r2_score']
    print("Epoch %d loss_train: %f R2_train %f R2_valid: %f  " %
          (i, loss_train, R2_train, R2_valid))

    losses_train.append(loss_train)
    score_valid.append(R2_valid)
    score_train.append(R2_train)

####Time
print("--- %s seconds ---" % (time.time() - start_time))

df = pd.DataFrame(list(zip(losses_train, score_train, score_valid)),
                  columns=['train-loss', 'train-R2score', 'valid-R2score'])

df.to_csv("score-hpopt.csv")
Beispiel #13
0
mols = [Chem.MolFromSmiles(smile) for smile in smiles]
feat = dc.feat.CircularFingerprint(size=1024)
arr = feat.featurize(mols)
print(arr)

feat = dc.feat.RDKitDescriptors()
arr = feat.featurize(mols)
print(arr)

tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = datasets

model = GraphConvModel(n_tasks=1, mode='regression', dropout=0.2)
model.fit(train_dataset, nb_epoch=100)

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
print(model.evaluate(train_dataset, [metric], transformers))
print(model.evaluate(test_dataset, [metric], transformers))

smiles = ['COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C',
          'CCOC(=O)CC',
          'CSc1nc(NC(C)C)nc(NC(C)C)n1',
          'CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1',
          'Cc1cc2ccccc2cc1C']

from rdkit import Chem
mols = [Chem.MolFromSmiles(s) for s in smiles]
featurizer = dc.feat.ConvMolFeaturizer()
x = featurizer.featurize(mols)

predicted_solubility = model.predict_on_batch(x)