best_params, trials = optimize_lgbm_hyperopt( train_folds, feature_set, average_precision_score, seed, hyperopt_trials=args.lgbm_hyperopt_trials) ap_score, models_best, oof_preds = fit_lgb(best_params, train_folds, feature_set, average_precision_score, True, True) print('CV score:', feature_set, ap_score) test_res = tools.eval_models(metadata_test, feature_set, models_best, mean_std_best=None, impute=False, model_type='lgbm') features_suffix = '_'.join(feature_set) y_test = metadata_test.Progressor.values.copy() > 0 ids = metadata_test.ID.values sides = metadata_test.Side.values results[f'preds_MOST_BL_all_{features_suffix}'] = (ids, sides, y_test, test_res) with open(os.path.join(args.save_dir, 'results_baselines_lgbm.pkl'), 'wb') as f: pickle.dump(results, f)
results = {} for feature_set in [['AGE', 'SEX', 'BMI'], [ 'KL', ], ['AGE', 'SEX', 'BMI', 'SURG', 'INJ', 'WOMAC'], ['AGE', 'SEX', 'BMI', 'KL'], ['AGE', 'SEX', 'BMI', 'KL', 'SURG', 'INJ', 'WOMAC']]: for regularize, model_type in zip([False, True], ['statsmodels', 'sklearn']): models_best, mean_std_best, folds_gt, folds_preds = baselines.build_logreg_model( train_folds, feature_set, seed, args.n_vals_c, average_precision_score, regularize) test_res = tools.eval_models(metadata_test, feature_set, models_best, mean_std_best, model_type=model_type) features_suffix = '_'.join(feature_set) y_test = metadata_test.Progressor.values.copy() > 0 ids = metadata_test.ID.values sides = metadata_test.Side.values print( f'[{model_type} | {feature_set}] CV AUC {roc_auc_score(folds_gt, folds_preds):.5} |' f' AP {average_precision_score(folds_gt, folds_preds):.5}') print( f'[{model_type} | {feature_set}] Test AUC {roc_auc_score(y_test, test_res):.5} |'