def fit_transform(self, x, y): self.fit(x, y, use_oof=True) predictions = np.zeros((x.shape[0], )) for i, (train_index, test_index) in enumerate(self.indices): predictions[test_index] = self.fitted_models[i].predict( x[test_index]) self.cv_scores = [ scoring_metric(y[test_index], predictions[test_index])**0.5 for i, (train_index, test_index) in enumerate(self.indices) ] self.avg_cv_score = np.mean(self.cv_scores) self.overall_cv_score = scoring_metric(y, predictions)**0.5 return predictions
def fit_transform(self, x, y): self.fit(x, y, use_oof=True) predictions = np.zeros((x.shape[0], )) for i, (train_index, test_index) in enumerate(self.indices): if isinstance(self.model, LogisticRegression): predictions[test_index] = self.fitted_models[i].predict_proba( self.scaler_models[i].transform(x[test_index]))[:, 1] else: predictions[test_index] = self.fitted_models[i].predict( x[test_index]) self.cv_scores = [ scoring_metric(y[test_index], predictions[test_index])**0.5 for i, (train_index, test_index) in enumerate(self.indices) ] self.avg_cv_score = np.mean(self.cv_scores) self.overall_cv_score = scoring_metric(y, predictions) return predictions
def fit_transform(self, x, y): self.fit(x, y, use_oof=True) predictions = np.zeros((x.shape[0],)) for i, (train_index, test_index) in enumerate(self.indices): if self.model.__class__.__name__ == 'LogisticRegression': predictions[test_index] = self.fitted_models[i].predict_proba(x[test_index])[:,1] elif self.model.__class__.__name__ == 'SVC' or self.model.__class__.__name__ == 'SVR': predictions[test_index] = self.fitted_models[i].predict(x[test_index]) else: predictions[test_index] = self.fitted_models[i].predict_proba(x[test_index])[:,1] self.cv_scores = [ scoring_metric(y[test_index], predictions[test_index]) for i, (train_index, test_index) in enumerate(self.indices) ] self.avg_cv_score = np.mean(self.cv_scores) self.overall_cv_score = scoring_metric(y, predictions) return predictions
def fit(self, x, y, use_oof=False, n_jobs=-1): if not hasattr(self.model, 'fit'): raise Exception("Model/algorithm needs to implement fit()") fitted_models = [] if use_oof: # kf = StratifiedKFold(n_splits=self.kfolds, random_state=self.random_state, shuffle=self.shuffle) # self.indices = [(train_index, test_index) for (train_index, test_index) in kf.split(x, y)] folds = CustomFolds(num_folds=self.n_splits, random_state=self.random_state, shuffle=self.shuffle, validation_scheme=self.validation_scheme) self.indices = folds.split(x, y, group=self.cv_group_col) for i, (train_index, test_index) in enumerate(self.indices): model = clone(self.model) model.n_jobs = n_jobs if (isinstance(model, LGBMRegressor) and self.early_stopping_rounds is not None): model.fit(X=x[train_index], y=y[train_index], eval_set=[(x[test_index], y[test_index]), (x[train_index], y[train_index])], verbose=100, eval_metric='rmse', early_stopping_rounds=self.early_stopping_rounds) elif (isinstance(model, XGBRegressor) and self.early_stopping_rounds is not None): model.fit(X=x[train_index], y=y[train_index], eval_set=[(x[test_index], y[test_index])], verbose=100, eval_metric='rmse', early_stopping_rounds=self.early_stopping_rounds) else: # model.n_jobs=-1 model.fit(x[train_index], y[train_index]) print scoring_metric(y[test_index], model.predict(x[test_index]))**0.5 fitted_models.append(model) else: model = clone(self.model) model.n_jobs = n_jobs x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, shuffle=True, random_state=100) if isinstance(model, LGBMRegressor): if self.early_stopping_rounds is not None: model.fit(X=x_train, y=y_train, eval_set=[(x_val, y_val)], verbose=False, eval_metric='rmse', early_stopping_rounds=self.early_stopping_rounds) elif isinstance(model, XGBRegressor): if self.early_stopping_rounds is not None: model.fit(X=x_train, y=y_train, eval_set=[(x_val, y_val)], verbose=False, eval_metric='rmse', early_stopping_rounds=self.early_stopping_rounds) model.fit(x, y) fitted_models.append(model) self.fitted_models = fitted_models return self