コード例 #1
0
ファイル: encoding.py プロジェクト: ujjwalagr/AV
 def fit_transform(self, X, y):
     self.fit(X, y)
     kf = CustomFolds(validation_scheme=self.fold_scheme, num_folds=self.folds, random_state=self.random_state, shuffle=self.shuffle)
     X = self.convert_input(X)
     copy_X = deepcopy(X)
     for i, (train_index, test_index) in enumerate(kf.split(copy_X)):
         encoding_dict, iv = self.create_encoding_dict(X.iloc[train_index], y[train_index])
         copy_X.iloc[test_index] = self.apply_encoding(X.iloc[test_index], encoding_dict)
     if self.return_df:
         return copy_X
     else:
         return copy_X.values
コード例 #2
0
ファイル: custom_estimator.py プロジェクト: ujjwalagr/AV
    def fit(self, x, y, use_oof=False, n_jobs=-1):
        if not hasattr(self.model, 'fit') :
            raise Exception ("Model/algorithm needs to implement fit()")
        fitted_models = []
        scaler_models=  []

        if use_oof:
            folds = CustomFolds(num_folds=self.n_splits,
            random_state=random.randint(0,1000) if self.random_state=='random' else self.random_state,
            shuffle=self.shuffle, validation_scheme=self.validation_scheme)
            self.indices = folds.split(x, y, group=self.cv_group_col)
            for i, (train_index, test_index) in enumerate(self.indices):
                model = clone(self.model)
                model.n_jobs = n_jobs
                if (isinstance(model, LGBMClassifier) and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index], y=y[train_index], eval_set=[(x[test_index],y[test_index]),(x[train_index],y[train_index])],
                        verbose=100, eval_metric='auc', early_stopping_rounds=self.early_stopping_rounds)
                elif (isinstance(model, XGBClassifier) and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index], y=y[train_index], eval_set=[(x[test_index],y[test_index])],
                        verbose=100, eval_metric='auc', early_stopping_rounds=self.early_stopping_rounds)
                elif (isinstance(model, CatBoostClassifier) and self.early_stopping_rounds is not None):
                    model.od_wait=int(self.early_stopping_rounds)
                    model.fit(x[train_index], y[train_index], cat_features=self.categorical_features_indices,
                             eval_set=(x[test_index],y[test_index]), use_best_model=True, verbose=100)
                elif isinstance(model,LogisticRegression):
                    model.fit(x[train_index],y[train_index])
                else:
                    model.fit(x[train_index], y[train_index])
                fitted_models.append(model)
        else:
            model = clone(self.model)
            model.n_jobs = n_jobs
            x_train, x_val, y_train, y_val = train_test_split(x, y, test_size =0.2, shuffle=True,
            random_state=random.randint(0,1000) if self.random_state=='random' else self.random_state
            )
            if isinstance(model, LGBMClassifier):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train, y=y_train, eval_set=[(x_val,y_val)],
                        verbose=100, eval_metric='auc', early_stopping_rounds=self.early_stopping_rounds)

            elif isinstance(model, XGBClassifier):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train, y=y_train, eval_set=[(x_val,y_val)],
                        verbose=100, eval_metric='auc', early_stopping_rounds=self.early_stopping_rounds)

            model.fit(x, y)
            fitted_models.append(model)
        self.fitted_models = fitted_models
        return self
コード例 #3
0
    def fit(self, x, y, use_oof=False, n_jobs=-1):
        if not hasattr(self.model, 'fit'):
            raise Exception("Model/algorithm needs to implement fit()")
        fitted_models = []
        scaler_models = []

        #         if (isinstance(self.model, CatBoostClassifier)):
        #             if (self.categorical_features_indices is None):
        #                 temp_df=pd.DataFrame(x)
        #                 temp_df=temp_df.apply(pd.to_numeric, errors='ignore')
        #                 self.categorical_features_indices=get_categorical_column_indexes(temp_df,threshold=self.threshold).tolist()
        #             self.encoder = LabelEncoding(categorical_columns=self.categorical_features_indices)
        #             x = self.encoder.fit_transform(x)
        if use_oof:
            folds = CustomFolds(num_folds=self.n_splits,
                                random_state=self.random_state,
                                shuffle=self.shuffle,
                                validation_scheme=self.validation_scheme)
            self.indices = folds.split(x, y, group=self.cv_group_col)

            for i, (train_index, test_index) in enumerate(self.indices):
                model = clone(self.model)
                model.n_jobs = n_jobs
                if (isinstance(model, LGBMClassifier)
                        and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index],
                              y=y[train_index],
                              eval_set=[(x[test_index], y[test_index]),
                                        (x[train_index], y[train_index])],
                              verbose=100,
                              eval_metric='auc',
                              early_stopping_rounds=self.early_stopping_rounds)
                elif (isinstance(model, XGBClassifier)
                      and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index],
                              y=y[train_index],
                              eval_set=[(x[test_index], y[test_index])],
                              verbose=50,
                              eval_metric='auc',
                              early_stopping_rounds=self.early_stopping_rounds)
                elif (isinstance(model, CatBoostClassifier)
                      and self.early_stopping_rounds is not None):
                    model.od_wait = int(self.early_stopping_rounds)
                    model.fit(x[train_index],
                              y[train_index],
                              cat_features=self.categorical_features_indices,
                              eval_set=(x[test_index], y[test_index]),
                              use_best_model=True,
                              verbose=10)
                elif isinstance(model, LogisticRegression):
                    scaler = RobustScaler()
                    xtrain = scaler.fit_transform(x[train_index])
                    scaler_models.append(scaler)
                    model.fit(xtrain, y[train_index])
                else:
                    model.fit(x[train_index], y[train_index])
                fitted_models.append(model)
        else:
            model = clone(self.model)
            model.n_jobs = n_jobs
            x_train, x_val, y_train, y_val = train_test_split(x,
                                                              y,
                                                              test_size=0.2,
                                                              shuffle=True,
                                                              random_state=100)
            if isinstance(model, LGBMClassifier):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train,
                              y=y_train,
                              eval_set=[(x_val, y_val)],
                              verbose=False,
                              eval_metric='auc',
                              early_stopping_rounds=self.early_stopping_rounds)

            elif isinstance(model, XGBClassifier):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train,
                              y=y_train,
                              eval_set=[(x_val, y_val)],
                              verbose=False,
                              eval_metric='auc',
                              early_stopping_rounds=self.early_stopping_rounds)

            model.fit(x, y)
            fitted_models.append(model)
        self.fitted_models = fitted_models
        self.scaler_models = scaler_models
        return self
コード例 #4
0
    def fit(self, x, y, use_oof=False, n_jobs=-1):
        if not hasattr(self.model, 'fit'):
            raise Exception("Model/algorithm needs to implement fit()")

        fitted_models = []
        if use_oof:
            #             kf = StratifiedKFold(n_splits=self.kfolds, random_state=self.random_state, shuffle=self.shuffle)
            #             self.indices = [(train_index, test_index) for (train_index, test_index) in kf.split(x, y)]
            folds = CustomFolds(num_folds=self.n_splits,
                                random_state=self.random_state,
                                shuffle=self.shuffle,
                                validation_scheme=self.validation_scheme)
            self.indices = folds.split(x, y, group=self.cv_group_col)
            for i, (train_index, test_index) in enumerate(self.indices):
                model = clone(self.model)
                model.n_jobs = n_jobs
                if (isinstance(model, LGBMRegressor)
                        and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index],
                              y=y[train_index],
                              eval_set=[(x[test_index], y[test_index]),
                                        (x[train_index], y[train_index])],
                              verbose=100,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)

                elif (isinstance(model, XGBRegressor)
                      and self.early_stopping_rounds is not None):
                    model.fit(X=x[train_index],
                              y=y[train_index],
                              eval_set=[(x[test_index], y[test_index])],
                              verbose=100,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)
                else:
                    #                     model.n_jobs=-1
                    model.fit(x[train_index], y[train_index])
                    print scoring_metric(y[test_index],
                                         model.predict(x[test_index]))**0.5
                fitted_models.append(model)
        else:
            model = clone(self.model)
            model.n_jobs = n_jobs
            x_train, x_val, y_train, y_val = train_test_split(x,
                                                              y,
                                                              test_size=0.2,
                                                              shuffle=True,
                                                              random_state=100)
            if isinstance(model, LGBMRegressor):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train,
                              y=y_train,
                              eval_set=[(x_val, y_val)],
                              verbose=False,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)

            elif isinstance(model, XGBRegressor):
                if self.early_stopping_rounds is not None:
                    model.fit(X=x_train,
                              y=y_train,
                              eval_set=[(x_val, y_val)],
                              verbose=False,
                              eval_metric='rmse',
                              early_stopping_rounds=self.early_stopping_rounds)

            model.fit(x, y)
            fitted_models.append(model)
        self.fitted_models = fitted_models
        return self