def fit(self, X, y): from sklearn.cross_validation import StratifiedKFold folds = StratifiedKFold(y, n_folds=self.n_folds, shuffle=True) train_folds, test_folds = zip(*folds) self.intermediate_ests = Parallel(n_jobs=self.n_jobs)( ( (kfold_fit, [est, X, y, train_folds], {}) for est in self.intermediate_estimators ) ) probas = np.hstack(Parallel(n_jobs=self.n_jobs)( ( (kfold_predict_proba_est, [est, X, test_folds], {}) for est in self.intermediate_ests ) )) y_test = np.hstack([y[idx] for idx in test_folds]) self.final_est = sk_clone(self.final_estimator).fit(probas, y_test) return self
def test_pickle(): est = XGBoostClassifier(num_rounds=50, objective='binary:logistic', silent=1) from sklearn.base import clone as sk_clone cl = sk_clone(est) assert (cl.params['num_rounds'] == 50)
def fit(self, x, y): from sklearn.cross_validation import KFold folds = KFold(n=len(y), n_folds=self.n_folds, shuffle=True) train_folds, test_folds = zip(*folds) intermediate_ests = Parallel(n_jobs=self.n_jobs)( ( (kfold_fit, [est, x, y, train_folds], {}) for est in self.intermediate_estimators ) ) probas = np.vstack(Parallel(n_jobs=self.n_jobs)( ( (kfold_predict_est, [est, x, test_folds], {}) for est in intermediate_ests ) )).T y_test = np.hstack([y[idx] for idx in test_folds]) self.final_est = sk_clone(self.final_estimator).fit(probas, y_test) self.intermediate_ests = Parallel(n_jobs=self.n_jobs)( ((fit_est_clone, [est, x, y], {}) for est in self.intermediate_estimators) ) return self
def _clone(estimator, safe=True, original=False): # XXX: This is a monkey patch to allow cloning of # CalibratedClassifierCV(cv="prefit"), while keeping the original # base_estimator. Do not reproduce at home! if hasattr(estimator, "_clone") and not original: return estimator._clone() else: return sk_clone(estimator, safe=safe)
def _clone_learner(self, treatment_values): """ Create a copy of underlining learner object for each of the treatment values. Args: treatment_values: lLst of unique values of treatment (can be a single value and not a list as well). Returns: dict[Any, learner]: Dictionary that holds for each treatment value (key) a learner object (value) that was passed during initialization. """ treatment_values = [treatment_values] if isscalar(treatment_values) else treatment_values learners = {treatment_value: sk_clone(self.learner) for treatment_value in treatment_values} return learners
def split_test(est, n_tests): df = pd.read_csv('train.csv.gz', index_col='Id') features = df.drop(['revenue'], axis=1) target = df.revenue scores = [] for i in range(n_tests): m = sk_clone(est) xtr, xtst, ytr, ytst = train_test_split(features, target, test_size=.2) m.fit(xtr, ytr) scorer = make_scorer(rmse) scores.append(scorer(m, xtst, ytst)) scores = np.array(scores) return {'RMSE-mean': scores.mean(), 'RMSE-STD': scores.std()}
def _instantiate_nearest_neighbors_object(self): backend = self.knn_backend if backend == "sklearn": backend_instance = NearestNeighbors(algorithm="auto") elif callable(backend): backend_instance = backend() self.metric = backend_instance.metric elif hasattr(backend, "fit") and hasattr(backend, "kneighbors"): backend_instance = sk_clone(backend) self.metric = backend_instance.metric else: raise NotImplementedError( "`knn_backend` must be either an NearestNeighbors-like object," " a callable returning such an object, or the string \"sklearn\"" ) backend_instance.set_params(**self._get_metric_dict()) return backend_instance
def fit_clone_with_key(estimator, features, labels, key): return key, sk_clone(estimator).fit(features, labels)
def kfold_fit(estimator, X, y, folds): ests = [ sk_clone(estimator).fit(X[idx], y[idx]) for idx in folds ] return ests
def fit_est_clone(estimator, features, labels): return sk_clone(estimator).fit(features, labels)
def fit_clone_with_key(estimator, features, labels, key): from sklearn.base import clone as sk_clone return key, sk_clone(estimator).fit(features, labels)