model = calibrated_pipeline.calibrated_classifiers_[0].base_estimator

        path = '/work/mflora/ML_DATA/permutation_importance/'
        fnames = [
            join(
                path,
                f'permutation_importance_{model_name}_{target}_{time}_training_norm_aupdc{drop_opt}.pkl'
            )
        ]
        perm_imp_results = load_pickle(fnames)
        myInterpreter = InterpretToolkit(model=[None])
        myInterpreter.set_results(perm_imp_results,
                                  option='permutation_importance')
        important_vars = myInterpreter.get_important_vars(
            perm_imp_results,
            multipass=True,
            combine=False,
        )[model_name][:9]

        important_vars = ['srh_0to1_ens_mean_spatial_mean']

        examples_transformed, target_values_transformed = just_transforms(
            model, examples, target_values)
        myInterpreter = InterpretToolkit(model=[model.steps[-1][1]],
                                         model_names=[model_name],
                                         examples=examples_transformed,
                                         targets=target_values_transformed,
                                         feature_names=feature_names)

        njobs = len(important_vars)
Example #2
0
import itertools

targets = ['tornado', 'severe_wind', 'severe_hail']
times = ['first_hour', 'second_hour']
drop_opt = '_manual_drop_time_max_spatial_mean'  #'_drop_high_corr_pred'
model_set = ['RandomForest', 'XGBoost', 'LogisticRegression']

iterator = itertools.product(targets, times)

for pair in iterator:
    target, time = pair
    path = '/work/mflora/ML_DATA/permutation_importance/'
    fnames = [
        join(
            path,
            f'permutation_importance_{model_name}_{target}_{time}_training_norm_aupdc{drop_opt}.pkl'
        ) for model_name in model_set
    ]
    perm_imp_results = load_pickle(fnames)
    myInterpreter = InterpretToolkit(model=[None])
    myInterpreter.set_results(perm_imp_results,
                              option='permutation_importance')
    important_vars = myInterpreter.get_important_vars(perm_imp_results,
                                                      multipass=True,
                                                      combine=True,
                                                      nvars=9)

    fname = f'important_vars_all_models_{target}_{time}_{drop_opt}.pkl'
    with open(fname, 'wb') as pkl_file:
        pickle.dump(important_vars, pkl_file)