model_names = model_name resample_method = resample_dict[time][target][model_name] return join( path, f'perm_based_interaction_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc' ) myInterpreter = InterpretToolkit(examples=examples, targets=target_values) results = [] for target in targets: fnames = [ get_fnames(model_name, target, time, drop_opt) for model_name in ml_models ] results.append(myInterpreter.load_results(fnames)) fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc' ds = xr.open_dataset(fname) features = [var for var in list(ds.data_vars) if 'matched' not in var ] + ['Run Date'] readable_feature_names = { feature: to_readable_names([feature])[0] for feature in features } feature_colors = { feature: to_readable_names([feature])[1] for feature in features }
#feature_names = ['lcl_ml_ens_mean_spatial_mean', # 'shear_u_0to1_ens_mean_spatial_mean'] feature_names = [ 'hailcast_time_max_ens_mean_of_90th', 'w_up_time_max_ens_mean_of_90th', 'uh_2to5_time_max_ens_mean_of_90th', 'shear_v_0to6_ens_mean_spatial_mean', 'cape_ml_ens_mean_spatial_mean', 'temperature_700mb_ens_mean_spatial_mean', 'major_axis_length', 'divergence_10m_time_min_ens_mean_of_10th' ] myInterpreter = InterpretToolkit() fnames = [get_fnames(m, target, time, drop_opt) for m in model_names] print(fnames) results = myInterpreter.load_results(fnames=fnames) data1 = results['w_up_time_max_ens_std_of_90th__LogisticRegression__ale'] data2 = results[ 'temperature_700mb_ens_mean_spatial_mean__LogisticRegression__ale'] print( f'700 mb Temp ALE variance: {np.mean(np.std(results[f"temperature_700mb_ens_mean_spatial_mean__LogisticRegression__ale"].values, ddof=1, axis=1)): .5f}' ) ######################################### resample_method = resample_dict[time][target][model_names[0]] fnames = join( ale_path, f'ale_var_results_{model_names[0]}_{resample_method}_{target}_{time}{drop_opt}.nc' )
parameters['model_name'] = model_name calibrated_pipeline = _load_model(**parameters)['model'] model_names = [ model_name ] #[model_name+'_under'] if resample == 'under' model_names = [model_name] myInterpreter = InterpretToolkit( models=calibrated_pipeline, model_names=model_names, ) fnames = join( ale_path, f'pd_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc' ) pd_1d = myInterpreter.load_results(fnames=fnames) fnames = join( ale_path, f'pd_2d_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc' ) pd_2d = myInterpreter.load_results(fnames=fnames) # Load the permutation important results from the saved pickle file with open(f'IMPORTANT_FEATURES_ALL_MODELS_{target}_{time}.pkl', 'rb') as pkl_file: important_vars = pickle.load(pkl_file) features = list(itertools.combinations(important_vars, r=2)) results = myInterpreter.calc_friedman_h_stat(features=features,
feature_names = get_top_features(model_name, target, time, 'training', 'norm_aupdc', drop_opt, resample='') display_feature_names = {f: to_readable_names([f])[0] for f in feature_names} display_feature_names = _fix_long_names(display_feature_names) feature_units = {f: get_units(f) for f in feature_names} unnormalize_func = None myInterpreter = InterpretToolkit() fnames = get_fnames(model_name, target, time, resample_method, drop_opt, calibrate) myInterpreter.load_results(fnames=fnames) ice_fnames = get_ice_fnames(model_name, target, time, resample_method, drop_opt, calibrate) ice_dict = myInterpreter.load_results(fnames=ice_fnames) fig, axes = myInterpreter.plot_ale( features=feature_names[:10], display_feature_names=display_feature_names, display_units=feature_units, title=f'{plt_config.title_dict[target]} {time.replace("_", " ").title()}', unnormalize=unnormalize_func, ice_curves=ice_dict, ) fname = f'ale_{model_name}_{target}_{time}_{drop_opt}_{resample_method}{calibrate}.png'
######################################## fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc' ds = xr.open_dataset(fname) features = [var for var in list(ds.data_vars) if 'matched' not in var ] + ['Run Date'] ds.close() display_feature_names = {f: to_readable_names([f])[0] for f in features} display_feature_names = _fix_long_names(display_feature_names) ###feature_units = {f: get_units(f)for f in features} myInterpreter = InterpretToolkit() fnames = [get_fnames(m, target, time, drop_opt) for m in model_names] results = myInterpreter.load_results(fnames=fnames) feature_names = results[ 'ale_variance_interactions_rankings__LogisticRegression'].values feature_names = feature_names[:3] fnames = [get_2d_ale(m, target, time, drop_opt) for m in model_names] ale_data = myInterpreter.load_results(fnames=fnames) feature_names = [tuple(f.split('__')) for f in feature_names] print(feature_names) fig, axes = myInterpreter.plot_ale( ale_data=ale_data, features=feature_names, display_feature_names=display_feature_names,
return join( path, f'permutation_importance_{model_name}_{target}_{time}_{mode}_{metric}{drop_opt}{resample}.nc' ) ylabels = ['Severe Wind'] myInterpreter = InterpretToolkit() results = [] fnames = [ get_fnames('RandomForest', 'severe_wind', time, mode, metric, drop_opt, r) for r in ['under', ''] ] myInterpreter.load_results(fnames) fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc' ds = xr.open_dataset(fname) features = [var for var in list(ds.data_vars) if 'matched' not in var ] + ['Run Date'] readable_feature_names = { feature: to_readable_names([feature])[0] for feature in features } feature_colors = { feature: to_readable_names([feature])[1] for feature in features }
combos = pipeline_set.pipeline_set drop_opt = '' imputer_method = 'simple' ale_path = '/work/mflora/ML_DATA/ALE_RESULTS' start_time = datetime.datetime.now() for combo in combos: model_name, target, resample_method, normalize_method, time = combo results_fname = join( ale_path, f'ale_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc' ) myInterpreter = InterpretToolkit() myInterpreter.load_results(results_fname) results = myInterpreter.calc_ale_variance(model_names=model_name) save_fname = join( ale_path, f'ale_var_results_{model_name.replace("_under", "")}_{resample_method}_{target}_{time}{drop_opt}.nc' ) print(f'Saving {save_fname}...') myInterpreter.save_results(fname=save_fname, data=results) duration = datetime.datetime.now() - start_time seconds = duration.total_seconds() hours = seconds // 3600 minutes = (seconds % 3600) // 60 seconds = seconds % 60