def test_mp_metallicity_loader():
    current_dir = os.path.dirname(os.path.abspath(__file__))

    tasks, datasets, transformers = load_mp_metallicity(
        reload=False,
        data_dir=current_dir,
        featurizer_kwargs={'max_atoms': 8},
        splitter_kwargs={
            'seed': 42,
            'frac_train': 0.6,
            'frac_valid': 0.2,
            'frac_test': 0.2
        })

    assert tasks[0] == 'is_metal'
    assert datasets[0].X.shape == (3, 8)
    assert datasets[1].X.shape == (1, 8)
    assert datasets[2].X.shape == (1, 8)
    assert np.allclose(datasets[0].X[0], [
        0.80428488, -0.70720997, 1.29101261, 0.61631094, 0.84184489,
        -0.28273997, -1.10252907, -1.23500371
    ],
                       atol=0.01)

    if os.path.exists(os.path.join(current_dir, 'mp_is_metal.json')):
        os.remove(os.path.join(current_dir, 'mp_is_metal.json'))
Esempio n. 2
0
def test_cgcnn_reload():
  # load datasets
  current_dir = path.dirname(path.abspath(__file__))
  config = {
      "reload": False,
      "featurizer": CGCNNFeaturizer(),
      # disable transformer
      "transformers": [],
      "data_dir": path.join(current_dir, "assets")
  }
  tasks, datasets, transformers = load_mp_metallicity(**config)
  train, valid, test = datasets

  n_tasks = len(tasks)
  n_classes = 2
  model_dir = tempfile.mkdtemp()
  model = CGCNNModel(
      n_tasks=n_tasks,
      n_classes=n_classes,
      mode='classification',
      model_dir=model_dir,
      batch_size=4,
      learning_rate=0.001)

  # check train
  model.fit(train, nb_epoch=20)

  # check predict shape
  valid_preds = model.predict_on_batch(valid.X)
  assert valid_preds.shape == (2, n_classes)
  test_preds = model.predict(test)
  assert test_preds.shape == (3, n_classes)

  # check overfit
  classification_metric = Metric(roc_auc_score, n_tasks=n_tasks)
  scores = model.evaluate(
      train, [classification_metric], transformers, n_classes=n_classes)
  assert scores[classification_metric.name] > 0.8

  # reload
  reloaded_model = CGCNNModel(
      n_tasks=n_tasks,
      n_classes=n_classes,
      mode='classification',
      model_dir=model_dir,
      batch_size=4,
      learning_rate=0.001)
  reloaded_model.restore()

  original_pred = model.predict(test)
  reload_pred = reloaded_model.predict(test)
  assert np.all(original_pred == reload_pred)

  if path.exists(path.join(current_dir, 'mp_is_metal.json')):
    remove(path.join(current_dir, 'mp_is_metal.json'))
Esempio n. 3
0
def test_cgcnn():
    # regression test
    # load datasets
    current_dir = path.dirname(path.abspath(__file__))
    config = {
        "reload": False,
        "featurizer": CGCNNFeaturizer,
        # disable transformer
        "transformers": [],
        "data_dir": current_dir
    }
    tasks, datasets, transformers = load_perovskite(**config)
    train, valid, test = datasets

    n_tasks = len(tasks)
    model = CGCNNModel(n_tasks=n_tasks,
                       mode='regression',
                       batch_size=4,
                       learning_rate=0.001)

    # check train
    model.fit(train, nb_epoch=20)

    # check predict shape
    valid_preds = model.predict_on_batch(valid.X)
    assert valid_preds.shape == (2, n_tasks)
    test_preds = model.predict(test)
    assert test_preds.shape == (3, n_tasks)

    # check overfit
    regression_metric = Metric(mae_score, n_tasks=n_tasks)
    scores = model.evaluate(train, [regression_metric], transformers)
    assert scores[regression_metric.name] < 0.6

    # classification test
    tasks, datasets, transformers = load_mp_metallicity(**config)
    train, valid, test = datasets

    # load datasets
    n_tasks = len(tasks)
    n_classes = 2
    model = CGCNNModel(n_tasks=n_tasks,
                       n_classes=n_classes,
                       mode='classification',
                       batch_size=4,
                       learning_rate=0.001)

    # check train
    model.fit(train, nb_epoch=20)

    # check predict shape
    valid_preds = model.predict_on_batch(valid.X)
    assert valid_preds.shape == (2, n_classes)
    test_preds = model.predict(test)
    assert test_preds.shape == (3, n_classes)

    # check overfit
    classification_metric = Metric(roc_auc_score, n_tasks=n_tasks)
    scores = model.evaluate(train, [classification_metric],
                            transformers,
                            n_classes=n_classes)
    assert scores[classification_metric.name] > 0.8

    # TODO: Multi task classification test

    if path.exists(path.join(current_dir, 'perovskite.json')):
        remove(path.join(current_dir, 'perovskite.json'))
    if path.exists(path.join(current_dir, 'mp_is_metal.json')):
        remove(path.join(current_dir, 'mp_is_metal.json'))