test_df = as_category(test_df) test_X = test_df.drop(['CONTACT_DATE', 'SNAP_DATE'], axis=1) if clf_name != 'FeaturePredictor': cols = list(set(test_X.columns).difference(test_X.select_dtypes(include='category').columns)) test_X.loc[:, cols] = test_X.loc[:, cols].fillna(0).replace([np.inf, -np.inf], 0) for c in test_X.select_dtypes(include='category').columns: test_X.loc[:, c] = test_X.loc[:, c].cat.codes adv_auc = 0 adv_train_x, adv_train_y, adv_test_x, adv_test_y = adversial_train_test_split(train_X.loc[:, features], train_y, test_X.loc[:, features], topK=1000) bayes_cv_tuner._fit_best_model(adv_train_x, adv_train_y) adv_pred_y = bayes_cv_tuner.predict_proba(adv_test_x)[:, 1] adv_auc = roc_auc_score(adv_test_y, adv_pred_y) print(f'Adversial AUC = {adv_auc} by {len(adv_test_y)} samples') bayes_cv_tuner._fit_best_model(train_X, train_y) test_y = bayes_cv_tuner.predict_proba(test_X) df = pd.DataFrame(test_y[:, 1]) df.to_csv(f"submits/" f"{best_estimator.__class__.__name__}" f"_{datetime.now().strftime('%d_%H_%M')}" f"_{bayes_cv_tuner.best_score_:0.4f}" f"_{adv_auc:0.4f}.csv", header=None, index=None)
cv_folds = [train_test_split(range(len(X)), train_size=0.666)] model = BayesSearchCV(estimator=pipe, search_spaces={ 'model__latent_dim': (2, 20), 'model__intermediate_dim': (8, 128), 'model__epochs': (8, 128), 'model__D': (1e-3, 1e+3, 'log-uniform'), 'model__lr': (1e-4, 1e-2, 'log-uniform'), }, n_iter=32, cv=cv_folds, refit=False, error_score=-1.0) model.on_step = lambda x: print( (x, model.total_iterations(), model.best_score_)) model.fit(X, Y) model.refit = True model._fit_best_model(X, Y) print(model.best_params_) print(model.score(X, Y)) """ model = pipe model.set_params(**{'model__D': 5.1964624423233898, 'model__lr': 0.00010138257365940301, 'model__epochs': 26, 'model__intermediate_dim': 125, 'model__latent_dim': 2}) model.fit(X, Y) print(model.predict(X, Y)) """