def forward_selection(self, x_train, y_train, x_test, folds, cols):
        cv = Cross_Validate(None,
                            n_splits=self.n_splits,
                            len_trn=x_train.shape[0],
                            len_tst=x_test.shape[0],
                            clf=-1,
                            params=self.params,
                            max_round=self.max_round)

        x_train_cols = x_train[cols]
        x_test_cols = x_test[cols]

        x_train.drop(cols, axis=1, inplace=True)
        x_test.drop(cols, axis=1, inplace=True)

        cv.cross_validate_xgb(x_train, y_train, x_test, folds)
        self.current_best = cv.trn_gini
        self.scores.append(self.current_best)

        for i in range(len(cols)):
            print("Round %i" % (i + 1))
            print("Shape of train"),
            print x_train.shape

            for col in cols:

                x_train = pd.concat([x_train, x_train_cols[col]], axis=1)
                x_test = pd.concat([x_test, x_test_cols[col]], axis=1)

                cv.cross_validate_xgb(x_train, y_train, x_test, folds)

                if cv.trn_gini > self.current_best:
                    self.current_best = cv.trn_gini
                    self.col_temp = col

                x_train.drop(x_train_cols[col], axis=1, inplace=True)
                x_test.drop(x_test_cols[col], axis=1, inplace=True)

            if self.col_temp != 0:
                cols.remove(self.col_temp)
                x_train = pd.concat([x_train, x_train_cols[self.col_temp]],
                                    axis=1)
                x_test = pd.concat([x_test, x_test_cols[self.col_temp]],
                                   axis=1)

                self.cols.append(self.col_temp)
                self.scores.append(self.current_best)
                self.col_temp = 0
            else:
                break
Ejemplo n.º 2
0
def xgb03(x_train, y_train, x_test, folds, max_round, n_splits=5):
    params = {}
    params['max_depth'] = 4
    params['objective'] = "binary:logistic"
    params['eta'] = 0.025  # learning rate
    params['subsample'] = 0.9
    params['min_child_weight'] = 100
    params['colsample_bytree'] = 0.7
    params['gamma'] = 0.60
    params['n_jobs'] = -1
    params['reg_alpha'] = 4
    # params['reg_lambda'] = 5
    params['silent'] = 1

    # Additional processing of data
    x_train, x_test = feature_engineering_3(x_train, x_test, y_train)
    # Cross Validate
    cv = Cross_Validate(xgb03.__name__, n_splits, x_train.shape[0],
                        x_test.shape[0], -1, params, max_round)
    cv.cross_validate_xgb(x_train, y_train, x_test, folds, verbose_eval=100)

    return cv.trn_gini, cv.y_trn, cv.y_tst, cv.fscore
Ejemplo n.º 3
0
    def tune_seq(self, x_train, y_train, x_test, folds, verbose_eval=False):
        """ Tune parameters sequentially

        :return:
        """
        print("\ntuning starts...")

        for key in self.params_dict.keys():
            for item in self.params_dict[key]:
                print('Tuning for parameter %s with value %f' % (key, item))
                self.params_temp = self.params
                self.params_temp.update({key: item})
                cv = Cross_Validate(None,
                                    n_splits=self.n_splits,
                                    len_trn=x_train.shape[0],
                                    len_tst=x_test.shape[0],
                                    clf=-1,
                                    params=self.params,
                                    max_round=self.max_round)
                cv.cross_validate_xgb(x_train, y_train, x_test, folds,
                                      verbose_eval)
                self.params_temp.update({'score': cv.trn_gini})
                self.sframe = pd.concat([
                    self.sframe,
                    pd.Series(self.params_temp.values(),
                              index=self.params_temp.keys())
                ],
                                        axis=1)

                if cv.trn_gini > self.max_score:
                    self.max_item = item
                    self.max_score = cv.trn_gini

            self.params.update({key: self.max_item})

            self.max_item = 0
            self.max_score = 0
        self.sframe = self.sframe.transpose().reset_index()