Example #1
0
def fixture_learners_data(breast_cancer_data, boston_data,
                          boston_survival_data):
    """
    Returns:
        A list of iterables,
        each iterable containing a fitted model and
        X data and the predictions for the X_data
    """

    models_data = []
    X_class_train, _, Y_class_train, _ = breast_cancer_data
    ngb = NGBClassifier(verbose=False, n_estimators=10)
    ngb.fit(X_class_train, Y_class_train)
    models_data.append((ngb, X_class_train, ngb.predict(X_class_train)))

    X_reg_train, _, Y_reg_train, _ = boston_data
    ngb = NGBRegressor(verbose=False, n_estimators=10)
    ngb.fit(X_reg_train, Y_reg_train)
    models_data.append((ngb, X_reg_train, ngb.predict(X_reg_train)))

    X_surv_train, _, T_surv_train, E_surv_train, _ = boston_survival_data
    ngb = NGBSurvival(verbose=False, n_estimators=10)
    ngb.fit(X_surv_train, T_surv_train, E_surv_train)
    models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))

    ngb = NGBRegressor(Dist=MultivariateNormal(2), n_estimators=10)
    ngb.fit(X_surv_train, np.vstack([T_surv_train, E_surv_train]).T)
    models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))
    return models_data
Example #2
0
def test_classification():
    from sklearn.datasets import load_breast_cancer
    from sklearn.metrics import roc_auc_score, log_loss
    data, target = load_breast_cancer(True)
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=42)
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)
    assert score >= 0.95

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.20

    score = ngb.score(x_test, y_test)
    assert score <= 0.20

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])
    assert score >= 0.95
Example #3
0
def test_classification(breast_cancer_data):
    from sklearn.metrics import roc_auc_score, log_loss

    x_train, x_test, y_train, y_test = breast_cancer_data
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)

    # loose score requirement so it isn't failing all the time
    assert score >= 0.85

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.30

    score = ngb.score(x_test, y_test)
    assert score <= 0.30

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])

    assert score >= 0.85
Example #4
0
class myNGBoostClassifier:
    def make(self , params  ):
        self.model =  NGBClassifier(**params  )
        return self

    def fit(self, xtrain, ytrain, xtest=None, ytest=None, fit_params={}):
        if type(xtrain) == pd.core.frame.DataFrame:
                xtrain = xtrain.values
                ytrain = ytrain.values
                if type(xtest) != type(None) and type(ytest) != type(None):
                    xtest = xtest.values
                    ytest = ytest.values
        if type(xtest) == type(None) or type(ytest) == type(None) :
            self.model.fit( xtrain , ytrain , **fit_params )
        else:
            self.model.fit( xtrain , ytrain , X_val = xtest , Y_val = ytest ,**fit_params )
        
    def predict(self , xs ):
        return self.model.predict(xs) 
        
    def predict_proba(self, xs):
        if len(xs.shape) == 1:
            return self.model.predict_proba(xs.reshape(1,-1))
        else:
            return self.model.predict_proba(xs)
Example #5
0
def test_bernoulli(learner, breast_cancer_data: Tuple4Array):
    X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = breast_cancer_data
    # test early stopping features
    # test other args, n_trees, LR, minibatching- args as fixture
    ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_cls_train, Y_cls_train)
    y_pred = ngb.predict(X_cls_test)
    y_prob = ngb.predict_proba(X_cls_test)
    y_dist = ngb.pred_dist(X_cls_test)
Example #6
0
def test_categorical(k: int, learner, breast_cancer_data: Tuple4Array):
    X_train, X_test, y_train, _ = breast_cancer_data
    dist = k_categorical(k)
    y_train = np.random.randint(0, k, (len(y_train)))
    # test early stopping features
    ngb = NGBClassifier(Dist=dist, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_train, y_train)
    y_pred = ngb.predict(X_test)
    y_prob = ngb.predict_proba(X_test)
    y_dist = ngb.pred_dist(X_test)
Example #7
0
	def test_bernoulli(self, learners, cls_data):
		X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data
		for Learner in learners:
			# test early stopping features
			# test other args, n_trees, LR, minibatching- args as fixture
			ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=Learner, verbose=False)
			ngb.fit(X_cls_train, Y_cls_train)
			y_pred = ngb.predict(X_cls_test)
			y_prob = ngb.predict_proba(X_cls_test)
			y_dist = ngb.pred_dist(X_cls_test)
Example #8
0
	def test_categorical(self, learners, cls_data):
		X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data
		for K in [2,4,7]:
			Dist = k_categorical(K)
			Y_cls_train = np.random.randint(0,K,(len(Y_cls_train)))

			for Learner in learners:
				# test early stopping features
				ngb = NGBClassifier(Dist=Dist, Score=LogScore, Base=Learner, verbose=False)
				ngb.fit(X_cls_train, Y_cls_train)
				y_pred = ngb.predict(X_cls_test)
				y_prob = ngb.predict_proba(X_cls_test)
				y_dist = ngb.pred_dist(X_cls_test)
Example #9
0
    def NGBoost(self, args):  ## Natural gradient Boosting

        logger.info("Running Natural Gradient Boosting ... ")
        ## https://stanfordmlgroup.github.io/ngboost/1-useage.html
        from ngboost.learners import default_tree_learner
        from ngboost.distns import k_categorical, Bernoulli  ##Classifier
        from ngboost.distns import Exponential, Normal, LogNormal  ## Regressor
        from ngboost.scores import MLE, LogScore, CRPScore

        ## Base Learner
        from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

        # NGBoost
        ## Comment: If error with singular matrix, increase size of input data from 0.05 to 0.15
        if args.predictor.lower() == 'classifier':
            from ngboost import NGBClassifier as ngb
            learner = DecisionTreeRegressor(criterion='friedman_mse',
                                            max_depth=6,
                                            random_state=SEED)
            ngb = ngb(Base=learner,
                      n_estimators=2000,
                      Score=MLE,
                      Dist=Bernoulli,
                      random_state=SEED)

        elif args.predictor.lower() == 'regressor':
            from ngboost import NGBRegressor as ngb
            learner = DecisionTreeRegressor(criterion='friedman_mse',
                                            max_depth=3,
                                            random_state=SEED)
            ngb = ngb(Base=default_tree_learner,
                      Dist=Exponential,
                      Score=LogScore,
                      learning_rate=0.01,
                      minibatch_frac=0.6,
                      col_sample=0.6)

        ## Fit model
        ngb.fit(self.X_train, np.asarray(self.y_train).astype(int))

        ## Predict the labels
        self.y_pred = ngb.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)

        self.data['boosting_score'] = self.y_pred
        self.model = ngb
        return self
Example #10
0
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

from ngboost import NGBClassifier
from ngboost.distns import k_categorical

if __name__ == "__main__":

    X, y = load_breast_cancer(True)
    y[0:
      15] = 2  # artificially make this a 3-class problem instead of a 2-class problem
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)

    ngb = NGBClassifier(Dist=k_categorical(
        3))  # tell ngboost that there are 3 possible outcomes
    ngb.fit(X_train, Y_train)  # Y should have only 3 values: {0,1,2}

    # predicted probabilities of class 0, 1, and 2 (columns) for each observation (row)
    preds_proba = ngb.predict_proba(X_test)
    preds = ngb.predict(X_test)

    print(confusion_matrix(Y_test, preds))
    print(classification_report(Y_test, preds))