Esempio n. 1
0
    def fit(self, external_cols=None):
        self.predictors = {}
        self.predictions_cv = pd.DataFrame()
        for i_model, model in enumerate(self.models):
            model_name = stringify2(model, i_model)
            if self.verbose:
                logd(model_name)
            t0 = u.t()
            i_predictor = self.base_predictor(self.X, self.y, model, scoring=self.scoring,
                                              n_folds=self.n_folds, random_state=self.random_state,
                                              shuffle=self.shuffle, n_jobs=self.predictors_n_jobs,
                                              preprocessor=self.preprocessor, verbose=self.verbose)
            col = model_name
            i_predictor.fit()
            i_prediction_cv = i_predictor.predict()
            if not len(self.predictions_cv):
                self.predictions_cv = i_prediction_cv.rename(columns={i_predictor.cv_col: col})  # [i_predictor.cv_col]
            else:
                df = i_prediction_cv[[i_predictor.cv_col]].rename(columns={i_predictor.cv_col: col})
                # TODO assert index is not duplicate
                self.predictions_cv = self.predictions_cv.merge(df, left_index=True, right_index=True)

            i_predictor.fit_test()
            self.predictors[model_name] = i_predictor
            if self.verbose:
                logd("Fit %s in %2.2f seconds" % (model_name, u.td(t0)))
        self.fit_ensemble(external_cols=external_cols)
Esempio n. 2
0
    def __init__(self, x, y, models, ensemble_model, scoring=None, n_folds=3, random_state=SEED,
                 shuffle=False, n_jobs=-1, stratified=False, preprocessor=None, verbose=0,
                 ensemble_grid_params=None, score_greater_is_better=False):

        assert isinstance(models, (list, tuple, set)), type(models)
        assert isinstance(ensemble_model, sklearn.base.BaseEstimator), \
            "%s != %s" % (type(ensemble_model), type(sklearn.base.BaseEstimator))
        self.X = x
        self.y = y
        self.ensemble_model = ensemble_model
        self.n_folds = n_folds
        self.shuffle = shuffle
        self.models = models
        self.stratified = stratified
        self.random_state = random_state
        self.predictors_n_jobs = n_jobs
        self.scoring = scoring
        self.preprocessor = preprocessor
        self.verbose = verbose
        self.ensemble_scaler = None
        self.score_greater_is_better = score_greater_is_better
        self.base_predictor = KStratifiedPred if self.stratified else KFoldPred
        # TODO n_jobs split ensemble and CV

        self.true_col = "TRUE"
        self.cols = map(lambda i: "%s" % stringify2(i[1], i[0]), enumerate(models))
        self.predictions_cv = None
        self.predictions = None
        self.ensemble_grid_params = ensemble_grid_params
        self.predictors = {}