def _get_model_cv_preds(self, model, X_train, y_train, cache_file): """ Return cross-validation predictions on the training set, using cache if possible. This is used if stacking is enabled (ie. a second model is used to combine the stage 0 predictions). """ stack_preds = load_from_cache( "cache/models/%s/cv_preds/%s.pkl" % (self.cache_dir, cache_file), self.use_cached_models) if stack_preds is None: kfold = cross_validation.StratifiedKFold(y_train, 4) stack_preds = [] indexes_cv = [] for stage0, stack in kfold: model.fit(X_train[stage0], y_train[stage0]) stack_preds.extend(list(model.predict_proba( X_train[stack])[:, 1])) indexes_cv.extend(list(stack)) stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)] with open("cache/models/%s/cv_preds/%s.pkl" % ( self.cache_dir, cache_file), 'w+') as f: pickle.dump(stack_preds, f, pickle.HIGHEST_PROTOCOL) return stack_preds
def _get_model_cv_preds(self, model, X_train, y_train, cache_file): """ Return cross-validation predictions on the training set, using cache if possible. This is used if stacking is enabled (ie. a second model is used to combine the stage 0 predictions). """ stack_preds = load_from_cache( "models/%s/cv_preds/%s.pkl" % (self.cache_dir, cache_file), self.use_cached_models) if stack_preds is None: kfold = cross_validation.StratifiedKFold(y_train, 4) stack_preds = [] indexes_cv = [] for stage0, stack in kfold: model.fit(X_train[stage0], y_train[stage0]) stack_preds.extend( list(model.predict_proba(X_train[stack])[:, 1])) indexes_cv.extend(list(stack)) stack_preds = np.array(stack_preds)[sp.argsort(indexes_cv)] with open( "cache/models/%s/cv_preds/%s%d.pkl" % (self.cache_dir, cache_file), 'wb') as f: pickle.dump(stack_preds, f, pickle.HIGHEST_PROTOCOL) return stack_preds
def _get_model_preds(self, model, X_train, X_predict, y_train, cache_file): """ Return the model predictions on the prediction set, using cache if possible. """ model_output = load_from_cache( "cache/models/%s/%s.pkl" % (self.cache_dir, cache_file), self.use_cached_models) model_params, model_preds = model_output \ if model_output is not None else (None, None) if model_preds is None or model_params != model.get_params(): model.fit(X_train, y_train) model_preds = model.predict_proba(X_predict)[:, 1] with open("cache/models/%s/%s.pkl" % ( self.cache_dir, cache_file), 'wb') as f: pickle.dump((model.get_params(), model_preds), f) return model_preds
def _get_model_preds(self, model, X_train, X_predict, y_train, cache_file): """ Return the model predictions on the prediction set, using cache if possible. """ model_output = load_from_cache( "models/%s/%s.pkl" % (self.cache_dir, cache_file), self.use_cached_models) model_params, model_preds = model_output \ if model_output is not None else (None, None) if model_preds is None or model_params != model.get_params(): model.fit(X_train, y_train) model_preds = model.predict_proba(X_predict)[:, 1] with open("cache/models/%s/%s.pkl" % (self.cache_dir, cache_file), 'wb') as f: pickle.dump((model.get_params(), model_preds), f) return model_preds