def test_xgboost_classification(self):
        """Test that sklearn models can learn on simple classification datasets."""
        np.random.seed(123)
        dataset = sklearn.datasets.load_digits(n_class=2)
        X, y = dataset.data, dataset.target

        frac_train = .7
        n_samples = len(X)
        n_train = int(frac_train * n_samples)
        X_train, y_train = X[:n_train], y[:n_train]
        X_test, y_test = X[n_train:], y[n_train:]
        train_dataset = dc.data.NumpyDataset(X_train, y_train)
        test_dataset = dc.data.NumpyDataset(X_test, y_test)

        classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
        esr = {'early_stopping_rounds': 50}
        xgb_model = xgboost.XGBClassifier(n_estimators=50, seed=123)
        model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr)

        # Fit trained model
        model.fit(train_dataset)
        model.save()

        # Eval model on test
        scores = model.evaluate(test_dataset, [classification_metric])
        assert scores[classification_metric.name] > .9
Ejemplo n.º 2
0
 def model_builder(model_dir_xgb):
     xgboost_model = xgboost.XGBClassifier(
         max_depth=max_depth,
         learning_rate=learning_rate,
         n_estimators=n_estimators,
         gamma=gamma,
         min_child_weight=min_child_weight,
         max_delta_step=max_delta_step,
         subsample=subsample,
         colsample_bytree=colsample_bytree,
         colsample_bylevel=colsample_bylevel,
         reg_alpha=reg_alpha,
         reg_lambda=reg_lambda,
         scale_pos_weight=scale_pos_weight,
         base_score=base_score,
         seed=seed)
     return deepchem.models.xgboost_models.XGBoostModel(
         xgboost_model, model_dir_xgb, **esr)