Ejemplo n.º 1
0
    def predict_proba(self, X):
        X1, X2, idx1, idx2 = self._split_data(X)
        preds = self.models1[0].predict_proba(X1)
        preds[idx2] = self.models2[0].predict_proba(X2.loc[idx2])
        for i in range(1, self.num_runs):
            preds[idx1] += self.models1[i].predict_proba(X1.loc[idx1])
            preds[idx2] += self.models2[i].predict_proba(X2.loc[idx2])
        return preds / self.num_runs

    def _split_data(self, X):
        idx_1 = (X['upd_start_level'] <= START_LEVEL_THRESHOLD).values
        idx_2 = (X['upd_start_level'] > START_LEVEL_THRESHOLD).values
        X1 = X.loc[:, self.x1_columns]
        X2 = X.loc[:, self.x2_columns]
        return X1, X2, idx_1, idx_2


if __name__ == '__main__':
    np.random.seed(2707)

    X_train, X_test, y_train = utils.load_data(data_name='extended')

    clf = XGBoostSplit(num_runs=NUM_RUNS, base=CLASSIFIER)
    if MODE == 'cv':
        utils.perform_cv(X_train, y_train, clf, MODEL_NAME + '-' + CLASSIFIER,
                         stratify_labels=utils.load_stratify_labels())
    elif MODE == 'ensemble':
        utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble', stratify_labels=utils.load_stratify_labels())
    else:
        print('Unsupported# mode')
Ejemplo n.º 2
0
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    kf = StratifiedKFold(n_splits=CALIBRATION_CV,
                         shuffle=True,
                         random_state=4891)
    for i in range(1, MAX_POWER + 1):
        clf = KNeighborsClassifier(n_neighbors=2**i, n_jobs=1)
        clf = CalibratedClassifierCV(clf, method='isotonic', cv=kf)
        utils.VJUH(X_train,
                   X_test,
                   y_train,
                   clf,
                   MODEL_NAME + str(i),
                   'ensemble',
                   stratify_labels=utils.load_stratify_labels())

    train = []
    test = []
    for i in range(1, MAX_POWER + 1):
        tr, ts, _ = utils.load_data(data_name=MODEL_NAME + str(i),
                                    data_folder='ensemble')
        train.append(tr)
        test.append(ts)
    result = pd.concat(train, axis=1, ignore_index=True)
    utils.save_result(result.values,
                      MODEL_NAME[:-1],
                      data_folder='ensemble',
                      test=False)
    result = pd.concat(test, axis=1, ignore_index=True)
    utils.save_result(result.values,
Ejemplo n.º 3
0
    import warnings

    warnings.filterwarnings("ignore", category=DeprecationWarning)

    X_train, X_test, y_train = utils.load_data('log_flipped')

    scaler = StandardScaler()
    scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True))
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    encoder = LabelEncoder()
    y_train = encoder.fit_transform(y_train).astype(np.int32)
    num_classes = len(encoder.classes_)
    X_train = np.array(X_train).astype(np.float32)
    X_test = np.array(X_test).astype(np.float32)
    num_features = X_train.shape[1]

    clf = NeuralNet(input_shape=(None, num_features), output_num_units=num_classes, **PARAMS)

    if MODE == 'cv':
        utils.perform_cv(X_train, y_train, clf, MODEL_NAME, stratify_labels=utils.load_stratify_labels())
    elif MODE == 'ensemble':
        utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble',
                   lasagne=True,
                   runs=RUNS,
                   stratify_labels=utils.load_stratify_labels()
                   )
    else:
        print('Unsupported mode')