Beispiel #1
0
def gbjjInnerLoop(learner):
    """
    @param learner:
    @return: cv score of the learner
    """
    global trainX, trainY, lf, n_jobs, cvObj

    l = clone(learner)
    scores = jjcross_val_score(l, trainX, trainY, score_func=lf, n_jobs=n_jobs, cv=cvObj)
    return scores.mean()
Beispiel #2
0
    def _fit_stage(self, X, y, rmTolerance):
        """
        fits one stage of gradient boosting
        @param X:
        @param y:
        @param rmTolerance: tolerance for 1D optimization
        @return: nothing
        """

        residuals = self.lossFunction.negative_gradient(y, self._currentPrediction)
        trainX, trainY, _, _ = splitTrainTest(X, residuals, 1-self.subsample)   # stochastic boosting. train only on a portion of the data

        if len(np.unique(trainY))==1:
            hm = MajorityPredictor().fit(trainY)
        else:
            cvObj = KFold(n=len(trainX), n_folds=self.cvNumFolds, indices=False, shuffle=True, random_state=self.randomState)

            # find the h that best mimics the negative gradient
            if self.n_jobs > 1:  # parallel
                n_jobs = max(1, self.n_jobs/len(self.learners), self.cvNumFolds)
                # n_jobs = 1
                pool = MyPool(processes=self.n_jobs, initializer=gbjjInit, initargs=(trainX, trainY, self.lossFunction, n_jobs, cvObj))
                temp = pool.map_async(gbjjInnerLoop, self.learners)
                temp.wait()
                h_res = temp.get()
                pool.close()
                pool.join()

            else:   # single thread
                h_res = []

                for learner in self.learners:
                    if self.verbosity >= 2:
                        print 'Fitting learner:', learner
                    l = clone(learner)
                    scores = jjcross_val_score(l, trainX, trainY, score_func=self.lossFunction, n_jobs=1, cv=cvObj)
                    h_res.append(scores.mean())

            hm = clone(self.learners[np.argsort(h_res)[0]])

        if self.verbosity>=1:
            print "The best classifier is", hm.__class__

        # find rm
        hm.fit(trainX, trainY)
        hmx = hm.predict(X)
        rm = minimize_scalar(lambda r: self.lossFunction(y, self._currentPrediction + r*hmx), tol=rmTolerance).x

        # append estimator and weight
        self._estimators.append((hm, rm))