Beispiel #1
0
iterator = itertools.product(time_set, target_set)

for combo in iterator:
    time, target = combo
    parameters = {
        'time': time,
        'target': target,
        'drop_opt': drop_opt,
    }
    X, y = _load_train_data(**parameters)
    estimators = load_models(time, target, drop_opt, model_names)
    explainer = InterpretToolkit(estimators=estimators,
                                 estimator_names=model_names,
                                 X=X,
                                 y=y)

    # ale_results_all_models_tornado_first_hourL1_based_feature_selection_aggressive.nc
    fnames = join(ale_path,
                  f'ale_results_all_models_{target}_{time}{drop_opt}.nc')
    ale = explainer.load(fnames=fnames)
    results = explainer.interaction_strength(ale,
                                             n_bootstrap=10,
                                             subsample=0.1)

    print(results)

    explainer.save(fname=join(
        ale_path, f'ias_score_all_models_{target}_{time}{drop_opt}.nc'),
                   data=results)
        'shear_v_0to1_ens_mean_spatial_mean',
        'hailcast_time_max_ens_mean_of_90th',
        'major_axis_length',
        'uh_2to5_time_max_ens_mean_of_90th',
        'cin_ml_ens_std_spatial_mean',
        'minor_axis_length',
        'shear_v_0to6_ens_mean_spatial_mean',
        'lcl_ml_ens_mean_spatial_mean',
        'w_up_time_max_ens_mean_of_90th']
    '''
    data['X']['mid_level_lapse_rate_ens_mean_spatial_mean'] = data['X']['mid_level_lapse_rate_ens_mean_spatial_mean'] / 2.67765 
    X['mid_level_lapse_rate_ens_mean_spatial_mean'] = X['mid_level_lapse_rate_ens_mean_spatial_mean'] / 2.67765

    fname = join(perm_path, f'permutation_importance_all_models_{target}_{time}_training_{metric}{drop_opt}{perm_method}.nc')
    explainer = InterpretToolkit(X=data['X'],y=data['targets'],estimator_output='probability',) 
    perm_results = explainer.load(fname)
    #important_vars = perm_results['multipass_rankings__LogisticRegression'].values[:12]
    #important_vars = ['low_level_lapse_rate_ens_mean_spatial_mean']

    important_vars = ['mid_level_lapse_rate_ens_mean_spatial_mean']
    all_vars = perm_results['singlepass_rankings__LogisticRegression'].values

    display_feature_names = {f: to_readable_names([f])[0] for f in all_vars} 
    #display_feature_names = _fix_long_names(display_feature_names)
    feature_units = {f: get_units(f)for f in all_vars}

    if option == 'interaction':
        interaction_index = 'auto'
        y = None
    elif option == 'targets':
        interaction_index=None
Beispiel #3
0
'lcl_ml_ens_mean_spatial_mean',
'major_axis_length',
'cape_ml_ens_mean_spatial_mean',
'geopotential_height_500mb_ens_mean_spatial_mean',
'minor_axis_length',
'wz_0to2_time_max_ens_mean_of_90th',
'bouyancy_time_min_ens_mean_spatial_mean',
'shear_v_0to1_ens_mean_spatial_mean',
'uh_0to2_time_max_ens_std_spatial_mean']
########################################
print('First load of the data...')
display_feature_names = {f: to_readable_names([f])[0] for f in feature_names}
#display_feature_names = _fix_long_names(display_feature_names) 
feature_units = {f: get_units(f)for f in feature_names}

explainer = InterpretToolkit()
#fnames = [get_fnames(m, target, time, drop_opt) for m in model_names]

fnames = get_fnames(target, time, drop_opt)
data = explainer.load(fnames=fnames)
fig, axes = explainer.plot_ale(
        data,
        features = feature_names,
        display_feature_names=display_feature_names,
        display_units=feature_units, title=f'{plt_config.title_dict[target]} {time.replace("_", " ").title()}',
        hspace=.75
        )

fname=f'ale_{target}_{time}_{drop_opt}.png'
base_plot.save_figure(fig=fig, fname=fname)
Beispiel #4
0
        ax=ax,
        data=df,
        x=var,
        hue=target,
        legend=False,
    )


# Load the most important variables
path = '/work/mflora/ML_DATA/permutation_importance'
perm_imp_fname = join(
    path,
    f'permutation_importance_all_models_{target}_{time}_training_{metric}{drop_opt}{perm_method}.nc'
)
explainer = InterpretToolkit()
perm_imp_results = explainer.load(perm_imp_fname)
important_vars = perm_imp_results[
    f'{mode}_rankings__LogisticRegression'].values

important_vars = important_vars[:n_vars]

# Convert to pretty feature names
readable_feature_names = {
    feature: to_readable_names([feature])[0] + f' ({get_units(feature)})'
    for feature in important_vars
}

parameters = {
    'time': time,
    'target': target,
    'drop_opt': drop_opt,
    p_values = []
    for n in range(n_vars):
        p_value = permutation_test(multipass_scores[n,:],
                           scores_to_compare_against[n,:],
                           method='approximate',
                           num_rounds=1000,
                           seed=0)
        p_values.append(p_value)
        if p_value > 0.05:
            print('Probably the same distribution\n')
        else:
            print('Probably different distributions\n')
    p_values = np.array(p_values)>0.05
    return p_values

def get_fnames(target, time, mode, metric, drop_opt, perm_method, resample=''):
    return join(path, f'permutation_importance_{atype}_{target}_{time}_{mode}_{metric}{drop_opt}{perm_method}{resample}.nc')

explainer = InterpretToolkit()

results =[]
for target in targets:
    fname = get_fnames(target, time, mode, metric, drop_opt, perm_method, resample)
    results.append(explainer.load(fname))

p_values  = get_p_values(results[0], ml_models[0], n_vars=10)

for (target, time, mode, metric, perm_method)


model_names = ['LogisticRegression']
target = 'severe_hail' if mode is None else 'tornado'
time = 'first_hour'
drop_opt = 'L1_based_feature_selection_with_manual'
perf_keys = ["Best Hits",
             "Worst False Alarms", 
             "Worst Misses",
            ]
metric = 'norm_aupdc'
perm_method = 'backward'

########################################
explainer = InterpretToolkit()
fnames = [get_fnames(m, target, time, drop_opt, mode) for m in model_names]
dframe = explainer.load(fnames=fnames, dtype='dataframe')

feature_names = dframe.attrs['feature_names']
display_feature_names = {f: to_readable_names([f])[0] for f in feature_names}
#display_feature_names = _fix_long_names(display_feature_names)
feature_units = {f: get_units(f)for f in feature_names}

fname = join(perm_path, f'permutation_importance_all_models_{target}_{time}_training_{metric}{drop_opt}{perm_method}.nc')
perm_results = explainer.load(fname)
important_vars = perm_results['multipass_rankings__LogisticRegression'].values[:12]

#important_vars=feature_names
#important_vars.remove('Run Date')
if 'Initialization Time' in important_vars:
    important_vars.remove('Initialization Time')