clf = None metric = 'logloss' if CLASSIFIER == 'xgb' else 'binary_logloss' if CLASSIFIER == 'xgb': clf = xgb.XGBClassifier(**PARAMS) else: par = PARAMS.copy() par['num_leaves'] = 2**par['max_depth'] del par['gamma'] del par['max_depth'] clf = lgb.LGBMClassifier(**par) if MODE == 'cv': utils.perform_cv(X_train, y_train, clf, MODEL_NAME + '-' + CLASSIFIER, fit_params={'eval_metric': metric}, stratify_labels=utils.load_stratify_labels()) elif MODE == 'out': utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'out', fit_params={'eval_metric': metric}, seed_name='seed', runs=RUNS, stratify_labels=utils.load_stratify_labels()) else: print('Unsupported mode')
def predict_proba(self, X): X1, X2, idx1, idx2 = self._split_data(X) preds = self.models1[0].predict_proba(X1) preds[idx2] = self.models2[0].predict_proba(X2.loc[idx2]) for i in range(1, self.num_runs): preds[idx1] += self.models1[i].predict_proba(X1.loc[idx1]) preds[idx2] += self.models2[i].predict_proba(X2.loc[idx2]) return preds / self.num_runs def _split_data(self, X): idx_1 = (X['upd_start_level'] <= START_LEVEL_THRESHOLD).values idx_2 = (X['upd_start_level'] > START_LEVEL_THRESHOLD).values X1 = X.loc[:, self.x1_columns] X2 = X.loc[:, self.x2_columns] return X1, X2, idx_1, idx_2 if __name__ == '__main__': np.random.seed(2707) X_train, X_test, y_train = utils.load_data(data_name='extended') clf = XGBoostSplit(num_runs=NUM_RUNS, base=CLASSIFIER) if MODE == 'cv': utils.perform_cv(X_train, y_train, clf, MODEL_NAME + '-' + CLASSIFIER, stratify_labels=utils.load_stratify_labels()) elif MODE == 'ensemble': utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble', stratify_labels=utils.load_stratify_labels()) else: print('Unsupported# mode')
import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) X_train, X_test, y_train = utils.load_data('log_flipped') scaler = StandardScaler() scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True)) X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns) X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns) encoder = LabelEncoder() y_train = encoder.fit_transform(y_train).astype(np.int32) num_classes = len(encoder.classes_) X_train = np.array(X_train).astype(np.float32) X_test = np.array(X_test).astype(np.float32) num_features = X_train.shape[1] clf = NeuralNet(input_shape=(None, num_features), output_num_units=num_classes, **PARAMS) if MODE == 'cv': utils.perform_cv(X_train, y_train, clf, MODEL_NAME, stratify_labels=utils.load_stratify_labels()) elif MODE == 'ensemble': utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble', lasagne=True, runs=RUNS, stratify_labels=utils.load_stratify_labels() ) else: print('Unsupported mode')