Exemple #1
0
    def test_tf_reload(self):
        """Test that tensorflow models can overfit simple classification datasets."""
        n_samples = 10
        n_features = 3
        n_tasks = 1
        n_classes = 2

        # Generate dummy dataset
        np.random.seed(123)
        ids = np.arange(n_samples)
        X = np.random.rand(n_samples, n_features)
        y = np.random.randint(n_classes, size=(n_samples, n_tasks))
        w = np.ones((n_samples, n_tasks))

        dataset = NumpyDataset(X, y, w, ids)

        verbosity = "high"
        classification_metric = Metric(metrics.accuracy_score,
                                       verbosity=verbosity)

        tensorflow_model = TensorflowMultiTaskClassifier(n_tasks,
                                                         n_features,
                                                         self.model_dir,
                                                         dropouts=[0.],
                                                         verbosity=verbosity)
        model = TensorflowModel(tensorflow_model, self.model_dir)

        # Fit trained model
        model.fit(dataset)
        model.save()

        # Load trained model
        reloaded_tensorflow_model = TensorflowMultiTaskClassifier(
            n_tasks,
            n_features,
            self.model_dir,
            dropouts=[0.],
            verbosity=verbosity)
        reloaded_model = TensorflowModel(reloaded_tensorflow_model,
                                         self.model_dir)
        reloaded_model.reload()

        # Eval model on train
        transformers = []
        evaluator = Evaluator(reloaded_model,
                              dataset,
                              transformers,
                              verbosity=verbosity)
        scores = evaluator.compute_model_performance([classification_metric])

        assert scores[classification_metric.name] > .6
Exemple #2
0
  def test_tf_multitask_classification_overfit(self):
    """Test tf multitask overfits tiny data."""
    n_tasks = 10
    n_samples = 10
    n_features = 3
    n_classes = 2
    
    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.zeros((n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
  
    dataset = NumpyDataset(X, y, w, ids)

    verbosity = "high"
    classification_metric = Metric(metrics.accuracy_score, verbosity=verbosity,
                                   task_averager=np.mean)
    tensorflow_model = TensorflowMultiTaskClassifier(
        n_tasks, n_features, self.model_dir, dropouts=[0.],
        learning_rate=0.0003, weight_init_stddevs=[.1],
        batch_size=n_samples, verbosity=verbosity)
    model = TensorflowModel(tensorflow_model, self.model_dir)

    # Fit trained model
    model.fit(dataset)
    model.save()

    # Eval model on train
    transformers = []
    evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])

    assert scores[classification_metric.name] > .9
Exemple #3
0
  def test_tf_skewed_missing_classification_overfit(self):
    """TF, skewed data, few actives

    Test tensorflow models overfit 0/1 datasets with missing data and few
    actives. This is intended to be as close to singletask MUV datasets as
    possible.
    """
    n_samples = 5120
    n_features = 6
    n_tasks = 1
    n_classes = 2
    
    # Generate dummy dataset
    np.random.seed(123)
    p = .002
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.binomial(1, p, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    y_flat, w_flat = np.squeeze(y), np.squeeze(w)
    y_nonzero = y_flat[w_flat != 0]
    num_nonzero = np.count_nonzero(y_nonzero)
    weight_nonzero = len(y_nonzero)/num_nonzero
    w_flat[y_flat != 0] = weight_nonzero
    w = np.reshape(w_flat, (n_samples, n_tasks))
  
    dataset = NumpyDataset(X, y, w, ids)

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    tensorflow_model = TensorflowMultiTaskClassifier(
        n_tasks, n_features, self.model_dir, dropouts=[0.],
        learning_rate=0.003, weight_init_stddevs=[1.],
        batch_size=n_samples, verbosity=verbosity)
    model = TensorflowModel(tensorflow_model, self.model_dir)

    # Fit trained model
    model.fit(dataset, nb_epoch=50)
    model.save()

    # Eval model on train
    transformers = []
    evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])

    assert scores[classification_metric.name] > .8
Exemple #4
0
  def test_singletask_tf_mlp_ECFP_classification_API(self):
    """Straightforward test of Tensorflow singletask deepchem classification API."""
    n_features = 1024
    featurizer = CircularFingerprint(size=n_features)

    tasks = ["outcome"]
    input_file = os.path.join(self.current_dir, "example_classification.csv")

    loader = DataLoader(tasks=tasks,
                        smiles_field=self.smiles_field,
                        featurizer=featurizer,
                        verbosity="low")
    dataset = loader.featurize(input_file, self.data_dir)

    splitter = ScaffoldSplitter()
    train_dataset, test_dataset = splitter.train_test_split(
        dataset, self.train_dir, self.test_dir)
    
    transformers = [
        NormalizationTransformer(transform_y=True, dataset=train_dataset)]

    for dataset in [train_dataset, test_dataset]:
      for transformer in transformers:
        transformer.transform(dataset)

    classification_metrics = [Metric(metrics.roc_auc_score),
                              Metric(metrics.matthews_corrcoef),
                              Metric(metrics.recall_score),
                              Metric(metrics.accuracy_score)]

    tensorflow_model = TensorflowMultiTaskClassifier(
        len(tasks), n_features, self.model_dir)
    model = TensorflowModel(tensorflow_model, self.model_dir)

    # Fit trained model
    model.fit(train_dataset)
    model.save()

    # Eval model on train
    evaluator = Evaluator(model, train_dataset, transformers, verbosity=True)
    _ = evaluator.compute_model_performance(classification_metrics)

    # Eval model on test
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=True)
    _ = evaluator.compute_model_performance(classification_metrics)
Exemple #5
0
  def test_tf_skewed_classification_overfit(self):
    """Test tensorflow models can overfit 0/1 datasets with few actives."""
    #n_samples = 100
    n_samples = 100
    n_features = 3
    n_tasks = 1
    n_classes = 2
    
    # Generate dummy dataset
    np.random.seed(123)
    p = .05
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.binomial(1, p, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
  
    dataset = NumpyDataset(X, y, w, ids)

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    tensorflow_model = TensorflowMultiTaskClassifier(
        n_tasks, n_features, self.model_dir, dropouts=[0.],
        learning_rate=0.003, weight_init_stddevs=[.1],
        batch_size=n_samples, verbosity=verbosity)
    model = TensorflowModel(tensorflow_model, self.model_dir)

    # Fit trained model
    model.fit(dataset, nb_epoch=100)
    model.save()

    # Eval model on train
    transformers = []
    evaluator = Evaluator(model, dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])

    assert scores[classification_metric.name] > .75
Exemple #6
0
from deepchem.utils.evaluate import Evaluator
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier

np.random.seed(123)

pcba_tasks, pcba_datasets, transformers = load_pcba()
(train_dataset, valid_dataset, test_dataset) = pcba_datasets

metric = Metric(metrics.roc_auc_score, np.mean, mode="classification")

n_features = train_dataset.get_data_shape()[0]
model_dir = None
model = TensorflowMultiTaskClassifier(len(pcba_tasks),
                                      n_features,
                                      model_dir,
                                      dropouts=[.25],
                                      learning_rate=0.001,
                                      weight_init_stddevs=[.1],
                                      batch_size=64,
                                      verbosity="high")

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model,
                            train_dataset,
                            transformers,
                            verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([metric])

print("Train scores")
Exemple #7
0
  shutil.rmtree(base_dir)
os.makedirs(base_dir)

# Load Tox21 dataset
n_features = 1024
tox21_tasks, tox21_datasets, transformers = load_tox21(data_dir, reload=False)
# Do train/valid split.
train_dataset, valid_dataset = tox21_datasets

# Fit models
classification_metric = Metric(metrics.roc_auc_score, np.mean,
                               verbosity=verbosity,
                               mode="classification")

tensorflow_model = TensorflowMultiTaskClassifier(
    len(tox21_tasks), n_features, model_dir, dropouts=[.25],
    learning_rate=0.0003, weight_init_stddevs=[1.],
    batch_size=32, verbosity=verbosity)
model = TensorflowModel(tensorflow_model, model_dir)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers,
                            verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance(
    [classification_metric])

print("Train scores")
print(train_scores)
 def model_builder(model_params, model_dir):
     tensorflow_model = TensorflowMultiTaskClassifier(
         len(tasks), n_features, model_dir, **model_params)
     return TensorflowModel(tensorflow_model, model_dir)
Exemple #9
0
from deepchem.metrics import to_one_hot
from deepchem.utils.evaluate import Evaluator
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.models.tensorflow_models import TensorflowModel

np.random.seed(123)

pcba_tasks, pcba_datasets, transformers = load_pcba()
(train_dataset, valid_dataset) = pcba_datasets


metric = Metric(metrics.roc_auc_score, np.mean,
                               mode="classification")

model = TensorflowMultiTaskClassifier(
    len(pcba_tasks), n_features, model_dir, dropouts=[.25],
    learning_rate=0.001, weight_init_stddevs=[.1],
    batch_size=64, verbosity="high")

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([metric])

print("Train scores")
print(train_scores)

valid_evaluator = Evaluator(model, valid_dataset, transformers, verbosity=verbosity)
valid_scores = valid_evaluator.compute_model_performance([metric])