Example #1
0
            model_names = model_name
    resample_method = resample_dict[time][target][model_name]
    return join(
        path,
        f'perm_based_interaction_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc'
    )


myInterpreter = InterpretToolkit(examples=examples, targets=target_values)
results = []
for target in targets:
    fnames = [
        get_fnames(model_name, target, time, drop_opt)
        for model_name in ml_models
    ]
    results.append(myInterpreter.load_results(fnames))

fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc'
ds = xr.open_dataset(fname)
features = [var for var in list(ds.data_vars) if 'matched' not in var
            ] + ['Run Date']

readable_feature_names = {
    feature: to_readable_names([feature])[0]
    for feature in features
}
feature_colors = {
    feature: to_readable_names([feature])[1]
    for feature in features
}
Example #2
0
#feature_names = ['lcl_ml_ens_mean_spatial_mean',
#       'shear_u_0to1_ens_mean_spatial_mean']

feature_names = [
    'hailcast_time_max_ens_mean_of_90th', 'w_up_time_max_ens_mean_of_90th',
    'uh_2to5_time_max_ens_mean_of_90th', 'shear_v_0to6_ens_mean_spatial_mean',
    'cape_ml_ens_mean_spatial_mean', 'temperature_700mb_ens_mean_spatial_mean',
    'major_axis_length', 'divergence_10m_time_min_ens_mean_of_10th'
]

myInterpreter = InterpretToolkit()
fnames = [get_fnames(m, target, time, drop_opt) for m in model_names]

print(fnames)

results = myInterpreter.load_results(fnames=fnames)

data1 = results['w_up_time_max_ens_std_of_90th__LogisticRegression__ale']
data2 = results[
    'temperature_700mb_ens_mean_spatial_mean__LogisticRegression__ale']

print(
    f'700 mb Temp ALE variance: {np.mean(np.std(results[f"temperature_700mb_ens_mean_spatial_mean__LogisticRegression__ale"].values, ddof=1, axis=1)): .5f}'
)

#########################################
resample_method = resample_dict[time][target][model_names[0]]
fnames = join(
    ale_path,
    f'ale_var_results_{model_names[0]}_{resample_method}_{target}_{time}{drop_opt}.nc'
)
Example #3
0
    parameters['model_name'] = model_name
    calibrated_pipeline = _load_model(**parameters)['model']

    model_names = [
        model_name
    ]  #[model_name+'_under'] if resample == 'under' model_names = [model_name]
    myInterpreter = InterpretToolkit(
        models=calibrated_pipeline,
        model_names=model_names,
    )

    fnames = join(
        ale_path,
        f'pd_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc'
    )
    pd_1d = myInterpreter.load_results(fnames=fnames)

    fnames = join(
        ale_path,
        f'pd_2d_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc'
    )
    pd_2d = myInterpreter.load_results(fnames=fnames)

    # Load the permutation important results from the saved pickle file
    with open(f'IMPORTANT_FEATURES_ALL_MODELS_{target}_{time}.pkl',
              'rb') as pkl_file:
        important_vars = pickle.load(pkl_file)

    features = list(itertools.combinations(important_vars, r=2))

    results = myInterpreter.calc_friedman_h_stat(features=features,
Example #4
0
feature_names = get_top_features(model_name,
                                 target,
                                 time,
                                 'training',
                                 'norm_aupdc',
                                 drop_opt,
                                 resample='')
display_feature_names = {f: to_readable_names([f])[0] for f in feature_names}
display_feature_names = _fix_long_names(display_feature_names)
feature_units = {f: get_units(f) for f in feature_names}

unnormalize_func = None
myInterpreter = InterpretToolkit()
fnames = get_fnames(model_name, target, time, resample_method, drop_opt,
                    calibrate)
myInterpreter.load_results(fnames=fnames)

ice_fnames = get_ice_fnames(model_name, target, time, resample_method,
                            drop_opt, calibrate)
ice_dict = myInterpreter.load_results(fnames=ice_fnames)

fig, axes = myInterpreter.plot_ale(
    features=feature_names[:10],
    display_feature_names=display_feature_names,
    display_units=feature_units,
    title=f'{plt_config.title_dict[target]} {time.replace("_", " ").title()}',
    unnormalize=unnormalize_func,
    ice_curves=ice_dict,
)

fname = f'ale_{model_name}_{target}_{time}_{drop_opt}_{resample_method}{calibrate}.png'
Example #5
0
########################################
fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc'
ds = xr.open_dataset(fname)
features = [var for var in list(ds.data_vars) if 'matched' not in var
            ] + ['Run Date']
ds.close()

display_feature_names = {f: to_readable_names([f])[0] for f in features}
display_feature_names = _fix_long_names(display_feature_names)
###feature_units = {f: get_units(f)for f in features}

myInterpreter = InterpretToolkit()
fnames = [get_fnames(m, target, time, drop_opt) for m in model_names]

results = myInterpreter.load_results(fnames=fnames)
feature_names = results[
    'ale_variance_interactions_rankings__LogisticRegression'].values

feature_names = feature_names[:3]
fnames = [get_2d_ale(m, target, time, drop_opt) for m in model_names]
ale_data = myInterpreter.load_results(fnames=fnames)

feature_names = [tuple(f.split('__')) for f in feature_names]

print(feature_names)

fig, axes = myInterpreter.plot_ale(
    ale_data=ale_data,
    features=feature_names,
    display_feature_names=display_feature_names,
    return join(
        path,
        f'permutation_importance_{model_name}_{target}_{time}_{mode}_{metric}{drop_opt}{resample}.nc'
    )


ylabels = ['Severe Wind']

myInterpreter = InterpretToolkit()
results = []

fnames = [
    get_fnames('RandomForest', 'severe_wind', time, mode, metric, drop_opt, r)
    for r in ['under', '']
]
myInterpreter.load_results(fnames)

fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc'
ds = xr.open_dataset(fname)
features = [var for var in list(ds.data_vars) if 'matched' not in var
            ] + ['Run Date']

readable_feature_names = {
    feature: to_readable_names([feature])[0]
    for feature in features
}
feature_colors = {
    feature: to_readable_names([feature])[1]
    for feature in features
}
combos = pipeline_set.pipeline_set
drop_opt = ''
imputer_method = 'simple'

ale_path = '/work/mflora/ML_DATA/ALE_RESULTS'

start_time = datetime.datetime.now()

for combo in combos:
    model_name, target, resample_method, normalize_method, time = combo
    results_fname = join(
        ale_path,
        f'ale_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc'
    )
    myInterpreter = InterpretToolkit()
    myInterpreter.load_results(results_fname)

    results = myInterpreter.calc_ale_variance(model_names=model_name)
    save_fname = join(
        ale_path,
        f'ale_var_results_{model_name.replace("_under", "")}_{resample_method}_{target}_{time}{drop_opt}.nc'
    )

    print(f'Saving {save_fname}...')
    myInterpreter.save_results(fname=save_fname, data=results)

duration = datetime.datetime.now() - start_time
seconds = duration.total_seconds()
hours = seconds // 3600
minutes = (seconds % 3600) // 60
seconds = seconds % 60