Beispiel #1
0
def test_compute_features_on_infinity_distance():
    """Test that WeaveModel correctly transforms WeaveMol objects into tensors with infinite max_pair_distance."""
    featurizer = dc.feat.WeaveFeaturizer(max_pair_distance=None)
    X = featurizer(["C", "CCC"])
    batch_size = 20
    model = WeaveModel(1,
                       batch_size=batch_size,
                       mode='classification',
                       fully_connected_layer_sizes=[2000, 1000],
                       batch_normalize=True,
                       batch_normalize_kwargs={
                           "fused": False,
                           "trainable": True,
                           "renorm": True
                       },
                       learning_rage=0.0005)
    atom_feat, pair_feat, pair_split, atom_split, atom_to_pair = model.compute_features_on_batch(
        X)

    # There are 4 atoms each of which have 75 atom features
    assert atom_feat.shape == (4, 75)
    # There are 10 pairs with infinity distance and 14 pair features
    assert pair_feat.shape == (10, 14)
    # 4 atoms in total
    assert atom_split.shape == (4, )
    assert np.all(atom_split == np.array([0, 1, 1, 1]))
    # 10 pairs in total
    assert pair_split.shape == (10, )
    assert np.all(pair_split == np.array([0, 1, 1, 1, 2, 2, 2, 3, 3, 3]))
    # 10 pairs in total each with start/finish
    assert atom_to_pair.shape == (10, 2)
    assert np.all(
        atom_to_pair == np.array([[0, 0], [1, 1], [1, 2], [1, 3], [2, 1],
                                  [2, 2], [2, 3], [3, 1], [3, 2], [3, 3]]))
Beispiel #2
0
 def test_weave_regression_model(self):
     tasks, dataset, transformers, metric = self.get_dataset(
         'regression', 'Weave')
     model = WeaveModel(len(tasks), mode='regression')
     model.fit(dataset, nb_epoch=80)
     scores = model.evaluate(dataset, [metric], transformers)
     assert all(s < 0.1 for s in scores['mean_absolute_error'])
Beispiel #3
0
def test_weave_fit_simple_distance_1():
    featurizer = dc.feat.WeaveFeaturizer(max_pair_distance=1)
    X = featurizer(["C", "CCC"])
    y = np.array([0, 1.])
    dataset = dc.data.NumpyDataset(X, y)

    batch_size = 20
    model = WeaveModel(1,
                       batch_size=batch_size,
                       mode='classification',
                       fully_connected_layer_sizes=[2000, 1000],
                       batch_normalize=True,
                       batch_normalize_kwargs={
                           "fused": False,
                           "trainable": True,
                           "renorm": True
                       },
                       learning_rage=0.0005)
    model.fit(dataset, nb_epoch=200)
    transformers = []
    metric = dc.metrics.Metric(dc.metrics.roc_auc_score,
                               np.mean,
                               mode="classification")
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9
Beispiel #4
0
 def test_weave_model(self):
     tasks, dataset, transformers, metric = self.get_dataset(
         'classification', 'Weave')
     model = WeaveModel(len(tasks), mode='classification')
     model.fit(dataset, nb_epoch=50)
     scores = model.evaluate(dataset, [metric], transformers)
     assert scores['mean-roc_auc_score'] >= 0.9
Beispiel #5
0
def test_weave_model():
  tasks, dataset, transformers, metric = get_dataset(
      'classification', 'Weave', data_points=10)

  batch_size = 10
  model = WeaveModel(
      len(tasks),
      batch_size=batch_size,
      mode='classification',
      dropouts=0,
      learning_rate=0.0001)
  model.fit(dataset, nb_epoch=250)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean-roc_auc_score'] >= 0.9
Beispiel #6
0
def test_weave_regression_model():
    import numpy as np
    import tensorflow as tf
    tf.random.set_seed(123)
    np.random.seed(123)
    tasks, dataset, transformers, metric = get_dataset('regression', 'Weave')

    batch_size = 10
    model = WeaveModel(len(tasks),
                       batch_size=batch_size,
                       mode='regression',
                       batch_normalize=False,
                       fully_connected_layer_sizes=[],
                       dropouts=0,
                       learning_rate=0.0005)
    model.fit(dataset, nb_epoch=200)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean_absolute_error'] < 0.1
  def test_change_loss_function_weave(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave', num_tasks=1)

    batch_size = 50
    model = WeaveModel(
        len(tasks), batch_size=batch_size, mode='regression', use_queue=False)

    model.fit(dataset, nb_epoch=1)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir, restore=False)
    dummy_label = model2.labels[-1]
    dummy_ouput = model2.outputs[-1]
    loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput]))
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)
Beispiel #8
0
def test_weave_model():
    tasks, dataset, transformers, metric = get_dataset('classification',
                                                       'Weave')

    batch_size = 20
    model = WeaveModel(len(tasks),
                       batch_size=batch_size,
                       mode='classification',
                       fully_connected_layer_sizes=[2000, 1000],
                       batch_normalize=True,
                       batch_normalize_kwargs={
                           "fused": False,
                           "trainable": True,
                           "renorm": True
                       },
                       learning_rage=0.0005)
    model.fit(dataset, nb_epoch=200)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9
Beispiel #9
0
def test_compute_features_on_distance_1():
  """Test that WeaveModel correctly transforms WeaveMol objects into tensors with finite max_pair_distance."""
  featurizer = dc.feat.WeaveFeaturizer(max_pair_distance=1)
  X = featurizer(["C", "CCC"])
  batch_size = 20
  model = WeaveModel(
      1,
      batch_size=batch_size,
      mode='classification',
      fully_connected_layer_sizes=[2000, 1000],
      batch_normalize=True,
      batch_normalize_kwargs={
          "fused": False,
          "trainable": True,
          "renorm": True
      },
      learning_rate=0.0005)
  atom_feat, pair_feat, pair_split, atom_split, atom_to_pair = model.compute_features_on_batch(
      X)

  # There are 4 atoms each of which have 75 atom features
  assert atom_feat.shape == (4, 75)
  # There are 8 pairs with distance 1 and 14 pair features. (To see why 8,
  # there's the self pair for "C". For "CCC" there are 7 pairs including self
  # connections and accounting for symmetry.)
  assert pair_feat.shape == (8, 14)
  # 4 atoms in total
  assert atom_split.shape == (4,)
  assert np.all(atom_split == np.array([0, 1, 1, 1]))
  # 10 pairs in total
  assert pair_split.shape == (8,)
  # The center atom is self connected and to both neighbors so it appears
  # thrice. The canonical ranking used in MolecularFeaturizer means this
  # central atom is ranked last in ordering.
  assert np.all(pair_split == np.array([0, 1, 1, 2, 2, 3, 3, 3]))
  # 10 pairs in total each with start/finish
  assert atom_to_pair.shape == (8, 2)
  assert np.all(atom_to_pair == np.array([[0, 0], [1, 1], [1, 3], [2, 2],
                                          [2, 3], [3, 1], [3, 2], [3, 3]]))
Beispiel #10
0
  def test_weave_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave')

    model = WeaveModel(len(tasks), mode='regression')

    model.fit(dataset, nb_epoch=80)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])
Beispiel #11
0
  def test_weave_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'Weave')

    model = WeaveModel(len(tasks), mode='classification')

    model.fit(dataset, nb_epoch=50)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])
Beispiel #12
0
  def test_weave_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave')

    model = WeaveModel(len(tasks), mode='regression')

    model.fit(dataset, nb_epoch=80)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])
Beispiel #13
0
  def test_weave_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'Weave')

    model = WeaveModel(len(tasks), mode='classification')

    model.fit(dataset, nb_epoch=50)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])
Beispiel #14
0
  def test_change_loss_function_weave(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave', num_tasks=1)

    batch_size = 50
    model = WeaveModel(
        len(tasks), batch_size=batch_size, mode='regression', use_queue=False)

    model.fit(dataset, nb_epoch=1)
    model.save()

    model2 = TensorGraph.load_from_dir(model.model_dir, restore=False)
    dummy_label = model2.labels[-1]
    dummy_ouput = model2.outputs[-1]
    loss = ReduceSum(L2Loss(in_layers=[dummy_label, dummy_ouput]))
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)
def generate_weave_model():
    batch_size = 64
    model = WeaveModel(1, batch_size=batch_size, learning_rate=1e-3, use_queue=False, mode='regression')
    return model
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

# Do setup required for tf/keras models
n_tasks = 1  #Only solubility to predict
n_atom_feat = 75
n_pair_feat = 14
n_hidden = 10
batch_size = 64
n_graph_feat = 10
nb_epoch = 10
model = WeaveModel(
    n_tasks=n_tasks,
    n_atom_feat=n_atom_feat,
    n_pair_feat=n_pair_feat,
    n_hidden=n_hidden,
    n_graph_feat=n_graph_feat,
    mode="regression",
    batch_size=batch_size,
    model_dir=
    "/home/rod/Dropbox/Quimica/Analysis/ANalisis/Borradores/WeaveModel/"
)  #To prevent overfitting

# Fit trained model
print("Fitting model")
model.fit(train_dataset, nb_epoch=nb_epoch)
model.save()
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")