def plotPerStationPredictionRun(source_path, observation_path, n_parallel): # gather all models in source folder error_data_per_run_dict = defaultdict() for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True): model_name = path.split('/')[-2] with open(path, 'rb') as file: ds = pkl.load(file) for data_var in ds.data_vars: da = ds[data_var] try: error_data_per_run_dict[data_var] += [(model_name, da)] except: error_data_per_run_dict[data_var] = [(model_name, da)] # load observations OBS = xr.open_dataset(observation_path) # get the prediction lead time to adjust time labels prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1 for run_error_data in error_data_per_run_dict.items(): run = run_error_data[0] models = run_error_data[1] stations = run_error_data[1][0][1].station.data inits = run_error_data[1][0][1].init.data init_type_mapping = np.array(run_error_data[1][0][1].init_type_mapping) train_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'train'] test_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'test'] sample_type_color_mapping = [mapping[1] for mapping in init_type_mapping] times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time) time_labels = [str(t)[:-13] for t in times] station_name_dict = get_station_dict(OBS, stations) model_station_mean_errors = {} # plot for each station the prediction run results in parallel with Pool(processes=n_parallel) as pool: process_results = [] for station_idx, station in enumerate(stations): print('Plotting of prediction run for station %s queued.' % station) process_results.append(pool.apply_async(plotPerStationPredictionRunWorker, (models, station, train_indices, test_indices, station_name_dict,sample_type_color_mapping, time_labels, source_path, run))) # aggregate results from all processes for ps_idx, ps_result in enumerate(process_results): # sync processes model_station_mean_error = ps_result.get() for experiment_title, station_data_list in model_station_mean_error.items(): try: model_station_mean_errors[experiment_title] += station_data_list except KeyError: model_station_mean_errors[experiment_title] = station_data_list print('[Process %s] Synchronized after plotting station.' % ps_idx) run_path = source_path + '/plots/prediction_runs/%s' % run if not os.path.exists(run_path): os.makedirs(run_path) generateStationPredictionResultTable(output_path=run_path, results=model_station_mean_errors)
def plotAveragedPredictionRun(source_path): # gather all models in source folder error_data_per_run_dict = {} for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True): model_name = path.split('/')[-2] with open(path, 'rb') as file: ds = pkl.load(file) for data_var in ds.data_vars: inits = ds[data_var].init.data sample_type_mapping = [mapping[1] for mapping in ds[data_var].init_type_mapping] prediction_data = ds[data_var].data try: error_data_per_run_dict[data_var] += [(model_name, inits, prediction_data, sample_type_mapping)] except: error_data_per_run_dict[data_var] = [(model_name, inits, prediction_data, sample_type_mapping)] # get the prediction lead time to adjust time labels prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1 times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time) time_labels = [str(t)[:-13] for t in times] for run_error_data in error_data_per_run_dict.items(): run = run_error_data[0] model_mean_errors = {} n_subplots = 10 fig, axes = plt.subplots(n_subplots, figsize=(60, 20), sharey=True) for model_idx, model_error_data in enumerate(run_error_data[1]): N = len(model_error_data[1]) split_length = N // n_subplots ind = np.arange(N) # the x locations for the groups experiment_title = model_error_data[0] prediction_data = model_error_data[2] init_type_mapping = model_error_data[3] train_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'train'] test_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'test'] filtered_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'filterd'] for i in range(n_subplots): # split indexes into slices for each subplot index_split = ind[i * split_length:(i + 1) * split_length] if model_idx == 0: sampleTypeBackgroundColoring(axes[i], index_split, init_type_mapping[i * split_length:(i + 1) * split_length]) axes[i].set_xlim([np.min(index_split), np.max(index_split)]) axes[i].plot(index_split, np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 0],axis=1), label=experiment_title, linewidth=0.15, alpha=0.8) train_model_bias = np.nanmean(prediction_data[train_indices][:,:,3]) train_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[train_indices][:,:,3]))) train_model_mae = np.nanmean(np.absolute(prediction_data[train_indices][:,:,3])) test_model_bias = np.nanmean(prediction_data[test_indices][:,:,3]) test_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[test_indices][:,:,3]))) test_model_mae = np.nanmean(np.absolute(prediction_data[test_indices][:,:,3])) filtered_model_bias = np.nanmean(prediction_data[filtered_indices][:,:,3]) filtered_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[filtered_indices][:,:,3]))) filtered_model_mae = np.nanmean(np.absolute(prediction_data[filtered_indices][:,:,3])) model_mean_errors[experiment_title] = (train_model_bias, train_model_rmse, train_model_mae, test_model_bias, test_model_rmse, test_model_mae, filtered_model_bias, filtered_model_rmse, filtered_model_mae) # add mean errors of cosmo output predictions train_diff_cosmo = prediction_data[train_indices][:,:,1] - prediction_data[train_indices][:,:,2] train_cosmo_bias = np.nanmean(train_diff_cosmo) train_cosmo_rmse = np.sqrt(np.nanmean(np.square(train_diff_cosmo))) train_cosmo_mae = np.nanmean(np.absolute(train_diff_cosmo)) test_diff_cosmo = prediction_data[test_indices][:,:,1] - prediction_data[test_indices][:,:,2] test_cosmo_bias = np.nanmean(test_diff_cosmo) test_cosmo_rmse = np.sqrt(np.nanmean(np.square(test_diff_cosmo))) test_cosmo_mae = np.nanmean(np.absolute(test_diff_cosmo)) filtered_diff_cosmo = prediction_data[filtered_indices][:,:,1] - prediction_data[filtered_indices][:,:,2] filtered_cosmo_bias = np.nanmean(filtered_diff_cosmo) filtered_cosmo_rmse = np.sqrt(np.nanmean(np.square(filtered_diff_cosmo))) filtered_cosmo_mae = np.nanmean(np.absolute(filtered_diff_cosmo)) # add COSMO-1 output prediction error model_mean_errors['COSMO-1'] = (train_cosmo_bias, train_cosmo_rmse, train_cosmo_mae, test_cosmo_bias, test_cosmo_rmse, test_cosmo_mae, filtered_cosmo_bias, filtered_cosmo_rmse, filtered_cosmo_mae) for i in range(n_subplots): axes[i].plot(ind[i * split_length:(i + 1) * split_length], np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 1], axis=1), label='COSMO-1', linewidth=0.15, alpha=0.8, color='b', linestyle='-.') axes[i].plot(ind[i * split_length:(i + 1) * split_length], np.nanmean(prediction_data[i * split_length:(i + 1) * split_length, 2], axis=1), label='Prediction', linewidth=0.15, alpha=0.8, color='m', linestyle='--') tick_step_size = np.maximum(split_length // 30, 1) axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length][::tick_step_size]) axes[i].set_xticklabels(time_labels[i * split_length:(i + 1) * split_length][::tick_step_size]) axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length], minor=True) # And a corresponding grid axes[i].grid(which='both') # Or if you want different settings for the grids: axes[i].grid(which='minor', alpha=0.2) axes[i].grid(which='major', alpha=0.5) handles, labels = axes[0].get_legend_handles_labels() axes[n_subplots - 1].set_xlabel('Time') axes[0].legend(handles, labels) plt.tight_layout() run_path = source_path + '/plots/prediction_runs/%s' % run if not os.path.exists(run_path): os.makedirs(run_path) fig.savefig(run_path + '/averaged_prediction.png', dpi=300) generatePredictionResultTable(output_path=run_path, results=model_mean_errors)