Esempio n. 1
0
    def fit(self, X, y):
        from sklearn.cross_validation import StratifiedKFold

        folds = StratifiedKFold(y, n_folds=self.n_folds, shuffle=True)

        train_folds, test_folds = zip(*folds)

        self.intermediate_ests = Parallel(n_jobs=self.n_jobs)(
            (
                (kfold_fit, [est, X, y, train_folds], {})
                for est in self.intermediate_estimators
            )
        )

        probas = np.hstack(Parallel(n_jobs=self.n_jobs)(
            (
                (kfold_predict_proba_est, [est, X, test_folds], {})
                for est in self.intermediate_ests
            )
        ))

        y_test = np.hstack([y[idx] for idx in test_folds])

        self.final_est = sk_clone(self.final_estimator).fit(probas, y_test)

        return self
Esempio n. 2
0
def test_pickle():
    est = XGBoostClassifier(num_rounds=50,
                            objective='binary:logistic',
                            silent=1)
    from sklearn.base import clone as sk_clone
    cl = sk_clone(est)
    assert (cl.params['num_rounds'] == 50)
Esempio n. 3
0
    def fit(self, x, y):
        from sklearn.cross_validation import KFold

        folds = KFold(n=len(y), n_folds=self.n_folds, shuffle=True)

        train_folds, test_folds = zip(*folds)

        intermediate_ests = Parallel(n_jobs=self.n_jobs)(
            (
                (kfold_fit, [est, x, y, train_folds], {})
                for est in self.intermediate_estimators
            )
        )

        probas = np.vstack(Parallel(n_jobs=self.n_jobs)(
            (
                (kfold_predict_est, [est, x, test_folds], {})
                for est in intermediate_ests
            )
        )).T

        y_test = np.hstack([y[idx] for idx in test_folds])

        self.final_est = sk_clone(self.final_estimator).fit(probas, y_test)

        self.intermediate_ests = Parallel(n_jobs=self.n_jobs)(
            ((fit_est_clone, [est, x, y], {}) for est in self.intermediate_estimators)
        )

        return self
Esempio n. 4
0
def _clone(estimator, safe=True, original=False):
    # XXX: This is a monkey patch to allow cloning of
    #      CalibratedClassifierCV(cv="prefit"), while keeping the original
    #      base_estimator. Do not reproduce at home!
    if hasattr(estimator, "_clone") and not original:
        return estimator._clone()
    else:
        return sk_clone(estimator, safe=safe)
Esempio n. 5
0
def _clone(estimator, safe=True, original=False):
    # XXX: This is a monkey patch to allow cloning of
    #      CalibratedClassifierCV(cv="prefit"), while keeping the original
    #      base_estimator. Do not reproduce at home!
    if hasattr(estimator, "_clone") and not original:
        return estimator._clone()
    else:
        return sk_clone(estimator, safe=safe)
Esempio n. 6
0
    def _clone_learner(self, treatment_values):
        """
        Create a copy of underlining learner object for each of the treatment values.
        Args:
            treatment_values: lLst of unique values of treatment (can be a single value and not a list as well).

        Returns:
            dict[Any, learner]: Dictionary that holds for each treatment value (key) a learner object (value) that
                                was passed during initialization.
        """
        treatment_values = [treatment_values] if isscalar(treatment_values) else treatment_values
        learners = {treatment_value: sk_clone(self.learner) for treatment_value in treatment_values}
        return learners
Esempio n. 7
0
def split_test(est, n_tests):
    df = pd.read_csv('train.csv.gz', index_col='Id')
    features = df.drop(['revenue'], axis=1)
    target = df.revenue

    scores = []
    for i in range(n_tests):
        m = sk_clone(est)
        xtr, xtst, ytr, ytst = train_test_split(features, target, test_size=.2)
        m.fit(xtr, ytr)
        scorer = make_scorer(rmse)
        scores.append(scorer(m, xtst, ytst))

    scores = np.array(scores)
    return {'RMSE-mean': scores.mean(), 'RMSE-STD': scores.std()}
Esempio n. 8
0
 def _instantiate_nearest_neighbors_object(self):
     backend = self.knn_backend
     if backend == "sklearn":
         backend_instance = NearestNeighbors(algorithm="auto")
     elif callable(backend):
         backend_instance = backend()
         self.metric = backend_instance.metric
     elif hasattr(backend, "fit") and hasattr(backend, "kneighbors"):
         backend_instance = sk_clone(backend)
         self.metric = backend_instance.metric
     else:
         raise NotImplementedError(
             "`knn_backend` must be either an NearestNeighbors-like object,"
             " a callable returning such an object, or the string \"sklearn\""
         )
     backend_instance.set_params(**self._get_metric_dict())
     return backend_instance
Esempio n. 9
0
def fit_clone_with_key(estimator, features, labels, key):
    return key, sk_clone(estimator).fit(features, labels)
Esempio n. 10
0
def kfold_fit(estimator, X, y, folds):
    ests = [
        sk_clone(estimator).fit(X[idx], y[idx])
        for idx in folds
    ]
    return ests
Esempio n. 11
0
def fit_est_clone(estimator, features, labels):
    return sk_clone(estimator).fit(features, labels)
Esempio n. 12
0
def fit_clone_with_key(estimator, features, labels, key):
    from sklearn.base import clone as sk_clone
    return key, sk_clone(estimator).fit(features, labels)