예제 #1
0
    def _get_model_cv_preds(self, model, X_train, y_train, cache_file):
        """
        Return cross-validation predictions on the training set, using cache
        if possible.
        This is used if stacking is enabled (ie. a second model is used to
        combine the stage 0 predictions).
        """
        stack_preds = load_from_cache(
            "cache/models/%s/cv_preds/%s.pkl" % (self.cache_dir, cache_file),
            self.use_cached_models)

        if stack_preds is None:
            kfold = cross_validation.StratifiedKFold(y_train, 4)
            stack_preds = []
            indexes_cv = []
            for stage0, stack in kfold:
                model.fit(X_train[stage0], y_train[stage0])
                stack_preds.extend(list(model.predict_proba(
                    X_train[stack])[:, 1]))
                indexes_cv.extend(list(stack))
            stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]

            with open("cache/models/%s/cv_preds/%s.pkl" % (
                    self.cache_dir, cache_file), 'w+') as f:
                pickle.dump(stack_preds, f, pickle.HIGHEST_PROTOCOL)

        return stack_preds
예제 #2
0
    def _get_model_cv_preds(self, model, X_train, y_train, cache_file):
        """
        Return cross-validation predictions on the training set, using cache
        if possible.
        This is used if stacking is enabled (ie. a second model is used to
        combine the stage 0 predictions).
        """
        stack_preds = load_from_cache(
            "models/%s/cv_preds/%s.pkl" % (self.cache_dir, cache_file),
            self.use_cached_models)

        if stack_preds is None:
            kfold = cross_validation.StratifiedKFold(y_train, 4)
            stack_preds = []
            indexes_cv = []
            for stage0, stack in kfold:
                model.fit(X_train[stage0], y_train[stage0])
                stack_preds.extend(
                    list(model.predict_proba(X_train[stack])[:, 1]))
                indexes_cv.extend(list(stack))
            stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)]

            with open(
                    "cache/models/%s/cv_preds/%s%d.pkl" %
                (self.cache_dir, cache_file), 'wb') as f:
                pickle.dump(stack_preds, f, pickle.HIGHEST_PROTOCOL)

        return stack_preds
예제 #3
0
    def _get_model_preds(self, model, X_train, X_predict, y_train, cache_file):
        """
        Return the model predictions on the prediction set,
        using cache if possible.
        """
        model_output = load_from_cache(
            "cache/models/%s/%s.pkl" % (self.cache_dir, cache_file),
            self.use_cached_models)

        model_params, model_preds = model_output \
            if model_output is not None else (None, None)

        if model_preds is None or model_params != model.get_params():
            model.fit(X_train, y_train)
            model_preds = model.predict_proba(X_predict)[:, 1]
            with open("cache/models/%s/%s.pkl" % (
                    self.cache_dir, cache_file), 'wb') as f:
                pickle.dump((model.get_params(), model_preds), f)

        return model_preds
예제 #4
0
    def _get_model_preds(self, model, X_train, X_predict, y_train, cache_file):
        """
        Return the model predictions on the prediction set,
        using cache if possible.
        """
        model_output = load_from_cache(
            "models/%s/%s.pkl" % (self.cache_dir, cache_file),
            self.use_cached_models)

        model_params, model_preds = model_output \
            if model_output is not None else (None, None)

        if model_preds is None or model_params != model.get_params():
            model.fit(X_train, y_train)
            model_preds = model.predict_proba(X_predict)[:, 1]
            with open("cache/models/%s/%s.pkl" % (self.cache_dir, cache_file),
                      'wb') as f:
                pickle.dump((model.get_params(), model_preds), f)

        return model_preds