def test_xgboost_classification(self): """Test that sklearn models can learn on simple classification datasets.""" np.random.seed(123) dataset = sklearn.datasets.load_digits(n_class=2) X, y = dataset.data, dataset.target frac_train = .7 n_samples = len(X) n_train = int(frac_train * n_samples) X_train, y_train = X[:n_train], y[:n_train] X_test, y_test = X[n_train:], y[n_train:] train_dataset = dc.data.NumpyDataset(X_train, y_train) test_dataset = dc.data.NumpyDataset(X_test, y_test) classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score) esr = {'early_stopping_rounds': 50} xgb_model = xgboost.XGBClassifier(n_estimators=50, seed=123) model = dc.models.XGBoostModel(xgb_model, verbose=False, **esr) # Fit trained model model.fit(train_dataset) model.save() # Eval model on test scores = model.evaluate(test_dataset, [classification_metric]) assert scores[classification_metric.name] > .9
def model_builder(model_dir_xgb): xgboost_model = xgboost.XGBClassifier( max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators, gamma=gamma, min_child_weight=min_child_weight, max_delta_step=max_delta_step, subsample=subsample, colsample_bytree=colsample_bytree, colsample_bylevel=colsample_bylevel, reg_alpha=reg_alpha, reg_lambda=reg_lambda, scale_pos_weight=scale_pos_weight, base_score=base_score, seed=seed) return deepchem.models.xgboost_models.XGBoostModel( xgboost_model, model_dir_xgb, **esr)