Ejemplo n.º 1
0
    def fit_transform(self, x, y):
        self.fit(x, y, use_oof=True)
        predictions = np.zeros((x.shape[0], ))
        for i, (train_index, test_index) in enumerate(self.indices):
            predictions[test_index] = self.fitted_models[i].predict(
                x[test_index])

        self.cv_scores = [
            scoring_metric(y[test_index], predictions[test_index])**0.5
            for i, (train_index, test_index) in enumerate(self.indices)
        ]
        self.avg_cv_score = np.mean(self.cv_scores)
        self.overall_cv_score = scoring_metric(y, predictions)**0.5
        return predictions
    def fit_transform(self, x, y):
        self.fit(x, y, use_oof=True)
        predictions = np.zeros((x.shape[0], ))
        for i, (train_index, test_index) in enumerate(self.indices):
            if isinstance(self.model, LogisticRegression):
                predictions[test_index] = self.fitted_models[i].predict_proba(
                    self.scaler_models[i].transform(x[test_index]))[:, 1]
            else:
                predictions[test_index] = self.fitted_models[i].predict(
                    x[test_index])

        self.cv_scores = [
            scoring_metric(y[test_index], predictions[test_index])**0.5
            for i, (train_index, test_index) in enumerate(self.indices)
        ]
        self.avg_cv_score = np.mean(self.cv_scores)
        self.overall_cv_score = scoring_metric(y, predictions)
        return predictions
    def fit_transform(self, x, y):
        self.fit(x, y, use_oof=True)
        predictions = np.zeros((x.shape[0],))
        for i, (train_index, test_index) in enumerate(self.indices):
            if self.model.__class__.__name__ == 'LogisticRegression':
                predictions[test_index] = self.fitted_models[i].predict_proba(x[test_index])[:,1]
            elif self.model.__class__.__name__ == 'SVC' or self.model.__class__.__name__ == 'SVR':
                predictions[test_index] = self.fitted_models[i].predict(x[test_index])
            else:
                predictions[test_index] = self.fitted_models[i].predict_proba(x[test_index])[:,1]


        self.cv_scores = [
            scoring_metric(y[test_index], predictions[test_index])
            for i, (train_index, test_index) in enumerate(self.indices)
        ]
        self.avg_cv_score = np.mean(self.cv_scores)
        self.overall_cv_score = scoring_metric(y, predictions)
        return predictions
Ejemplo n.º 4
0
    def fit(self, x, y, use_oof=False, n_jobs=-1):
        if not hasattr(self.model, 'fit'):
            raise Exception("Model/algorithm needs to implement fit()")

        fitted_models = []
        if use_oof:
            #             kf = StratifiedKFold(n_splits=self.kfolds, random_state=self.random_state, shuffle=self.shuffle)
            #             self.indices = [(train_index, test_index) for (train_index, test_index) in kf.split(x, y)]
            folds = CustomFolds(num_folds=self.n_splits,
                                random_state=self.random_state,
                                shuffle=self.shuffle,
                                validation_scheme=self.validation_scheme)
            self.indices = folds.split(x, y, group=self.cv_group_col)
            for i, (train_index, test_index) in enumerate(self.indices):
                model = clone(self.model)
                model.n_jobs = n_jobs
                if (isinstance(model, LGBMRegressor)
                        and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index],
                              y=y[train_index],
                              eval_set=[(x[test_index], y[test_index]),
                                        (x[train_index], y[train_index])],
                              verbose=100,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)

                elif (isinstance(model, XGBRegressor)
                      and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index],
                              y=y[train_index],
                              eval_set=[(x[test_index], y[test_index])],
                              verbose=100,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)
                else:
                    #                     model.n_jobs=-1
                    model.fit(x[train_index], y[train_index])
                    print scoring_metric(y[test_index],
                                         model.predict(x[test_index]))**0.5
                fitted_models.append(model)
        else:
            model = clone(self.model)
            model.n_jobs = n_jobs
            x_train, x_val, y_train, y_val = train_test_split(x,
                                                              y,
                                                              test_size=0.2,
                                                              shuffle=True,
                                                              random_state=100)
            if isinstance(model, LGBMRegressor):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train,
                              y=y_train,
                              eval_set=[(x_val, y_val)],
                              verbose=False,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)

            elif isinstance(model, XGBRegressor):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train,
                              y=y_train,
                              eval_set=[(x_val, y_val)],
                              verbose=False,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)

            model.fit(x, y)
            fitted_models.append(model)
        self.fitted_models = fitted_models
        return self