model = calibrated_pipeline.calibrated_classifiers_[0].base_estimator path = '/work/mflora/ML_DATA/permutation_importance/' fnames = [ join( path, f'permutation_importance_{model_name}_{target}_{time}_training_norm_aupdc{drop_opt}.pkl' ) ] perm_imp_results = load_pickle(fnames) myInterpreter = InterpretToolkit(model=[None]) myInterpreter.set_results(perm_imp_results, option='permutation_importance') important_vars = myInterpreter.get_important_vars( perm_imp_results, multipass=True, combine=False, )[model_name][:9] important_vars = ['srh_0to1_ens_mean_spatial_mean'] examples_transformed, target_values_transformed = just_transforms( model, examples, target_values) myInterpreter = InterpretToolkit(model=[model.steps[-1][1]], model_names=[model_name], examples=examples_transformed, targets=target_values_transformed, feature_names=feature_names) njobs = len(important_vars)
import itertools targets = ['tornado', 'severe_wind', 'severe_hail'] times = ['first_hour', 'second_hour'] drop_opt = '_manual_drop_time_max_spatial_mean' #'_drop_high_corr_pred' model_set = ['RandomForest', 'XGBoost', 'LogisticRegression'] iterator = itertools.product(targets, times) for pair in iterator: target, time = pair path = '/work/mflora/ML_DATA/permutation_importance/' fnames = [ join( path, f'permutation_importance_{model_name}_{target}_{time}_training_norm_aupdc{drop_opt}.pkl' ) for model_name in model_set ] perm_imp_results = load_pickle(fnames) myInterpreter = InterpretToolkit(model=[None]) myInterpreter.set_results(perm_imp_results, option='permutation_importance') important_vars = myInterpreter.get_important_vars(perm_imp_results, multipass=True, combine=True, nvars=9) fname = f'important_vars_all_models_{target}_{time}_{drop_opt}.pkl' with open(fname, 'wb') as pkl_file: pickle.dump(important_vars, pkl_file)