Esempio n. 1
0
    clf = None
    metric = 'logloss' if CLASSIFIER == 'xgb' else 'binary_logloss'

    if CLASSIFIER == 'xgb':
        clf = xgb.XGBClassifier(**PARAMS)
    else:
        par = PARAMS.copy()
        par['num_leaves'] = 2**par['max_depth']
        del par['gamma']
        del par['max_depth']
        clf = lgb.LGBMClassifier(**par)

    if MODE == 'cv':
        utils.perform_cv(X_train,
                         y_train,
                         clf,
                         MODEL_NAME + '-' + CLASSIFIER,
                         fit_params={'eval_metric': metric},
                         stratify_labels=utils.load_stratify_labels())
    elif MODE == 'out':
        utils.VJUH(X_train,
                   X_test,
                   y_train,
                   clf,
                   MODEL_NAME,
                   'out',
                   fit_params={'eval_metric': metric},
                   seed_name='seed',
                   runs=RUNS,
                   stratify_labels=utils.load_stratify_labels())
    else:
        print('Unsupported mode')
Esempio n. 2
0
    def predict_proba(self, X):
        X1, X2, idx1, idx2 = self._split_data(X)
        preds = self.models1[0].predict_proba(X1)
        preds[idx2] = self.models2[0].predict_proba(X2.loc[idx2])
        for i in range(1, self.num_runs):
            preds[idx1] += self.models1[i].predict_proba(X1.loc[idx1])
            preds[idx2] += self.models2[i].predict_proba(X2.loc[idx2])
        return preds / self.num_runs

    def _split_data(self, X):
        idx_1 = (X['upd_start_level'] <= START_LEVEL_THRESHOLD).values
        idx_2 = (X['upd_start_level'] > START_LEVEL_THRESHOLD).values
        X1 = X.loc[:, self.x1_columns]
        X2 = X.loc[:, self.x2_columns]
        return X1, X2, idx_1, idx_2


if __name__ == '__main__':
    np.random.seed(2707)

    X_train, X_test, y_train = utils.load_data(data_name='extended')

    clf = XGBoostSplit(num_runs=NUM_RUNS, base=CLASSIFIER)
    if MODE == 'cv':
        utils.perform_cv(X_train, y_train, clf, MODEL_NAME + '-' + CLASSIFIER,
                         stratify_labels=utils.load_stratify_labels())
    elif MODE == 'ensemble':
        utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble', stratify_labels=utils.load_stratify_labels())
    else:
        print('Unsupported# mode')
Esempio n. 3
0
    import warnings

    warnings.filterwarnings("ignore", category=DeprecationWarning)

    X_train, X_test, y_train = utils.load_data('log_flipped')

    scaler = StandardScaler()
    scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True))
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    encoder = LabelEncoder()
    y_train = encoder.fit_transform(y_train).astype(np.int32)
    num_classes = len(encoder.classes_)
    X_train = np.array(X_train).astype(np.float32)
    X_test = np.array(X_test).astype(np.float32)
    num_features = X_train.shape[1]

    clf = NeuralNet(input_shape=(None, num_features), output_num_units=num_classes, **PARAMS)

    if MODE == 'cv':
        utils.perform_cv(X_train, y_train, clf, MODEL_NAME, stratify_labels=utils.load_stratify_labels())
    elif MODE == 'ensemble':
        utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble',
                   lasagne=True,
                   runs=RUNS,
                   stratify_labels=utils.load_stratify_labels()
                   )
    else:
        print('Unsupported mode')