Example #1
0
def plotPerStationPredictionRun(source_path, observation_path, n_parallel):
    # gather all models in source folder
    error_data_per_run_dict = defaultdict()
    for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True):
        model_name = path.split('/')[-2]
        with open(path, 'rb') as file:
            ds = pkl.load(file)
        for data_var in ds.data_vars:
            da = ds[data_var]
            try:
                error_data_per_run_dict[data_var] += [(model_name, da)]
            except:
                error_data_per_run_dict[data_var] = [(model_name, da)]

    # load observations
    OBS = xr.open_dataset(observation_path)
    # get the prediction lead time to adjust time labels
    prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1

    for run_error_data in error_data_per_run_dict.items():
        run = run_error_data[0]
        models = run_error_data[1]
        stations = run_error_data[1][0][1].station.data
        inits = run_error_data[1][0][1].init.data
        init_type_mapping = np.array(run_error_data[1][0][1].init_type_mapping)
        train_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'train']
        test_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'test']
        sample_type_color_mapping = [mapping[1] for mapping in init_type_mapping]
        times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time)
        time_labels = [str(t)[:-13] for t in times]

        station_name_dict = get_station_dict(OBS, stations)

        model_station_mean_errors = {}
        # plot for each station the prediction run results in parallel
        with Pool(processes=n_parallel) as pool:
            process_results = []

            for station_idx, station in enumerate(stations):
                print('Plotting of prediction run for station %s queued.' % station)
                process_results.append(pool.apply_async(plotPerStationPredictionRunWorker,
                                                        (models, station, train_indices, test_indices,
                                                         station_name_dict,sample_type_color_mapping,
                                                         time_labels, source_path, run)))

            # aggregate results from all processes
            for ps_idx, ps_result in enumerate(process_results):
                # sync processes
                model_station_mean_error = ps_result.get()

                for experiment_title, station_data_list in model_station_mean_error.items():
                    try:
                        model_station_mean_errors[experiment_title] += station_data_list
                    except KeyError:
                        model_station_mean_errors[experiment_title] = station_data_list

                print('[Process %s] Synchronized after plotting station.' % ps_idx)

        run_path = source_path + '/plots/prediction_runs/%s' % run
        if not os.path.exists(run_path):
            os.makedirs(run_path)

        generateStationPredictionResultTable(output_path=run_path, results=model_station_mean_errors)
Example #2
0
def plotAveragedPredictionRun(source_path):
    # gather all models in source folder
    error_data_per_run_dict = {}
    for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True):
        model_name = path.split('/')[-2]
        with open(path, 'rb') as file:
            ds = pkl.load(file)
        for data_var in ds.data_vars:
            inits = ds[data_var].init.data
            sample_type_mapping = [mapping[1] for mapping in ds[data_var].init_type_mapping]
            prediction_data = ds[data_var].data
            try:
                error_data_per_run_dict[data_var] += [(model_name, inits, prediction_data, sample_type_mapping)]
            except:
                error_data_per_run_dict[data_var] = [(model_name, inits, prediction_data, sample_type_mapping)]


    # get the prediction lead time to adjust time labels
    prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1

    times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time)
    time_labels = [str(t)[:-13] for t in times]

    for run_error_data in error_data_per_run_dict.items():
        run = run_error_data[0]
        model_mean_errors = {}
        n_subplots = 10
        fig, axes = plt.subplots(n_subplots, figsize=(60, 20), sharey=True)
        for model_idx, model_error_data in enumerate(run_error_data[1]):

            N = len(model_error_data[1])
            split_length = N // n_subplots
            ind = np.arange(N)  # the x locations for the groups
            experiment_title = model_error_data[0]
            prediction_data = model_error_data[2]
            init_type_mapping = model_error_data[3]
            train_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'train']
            test_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'test']
            filtered_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'filterd']


            for i in range(n_subplots):
                # split indexes into slices for each subplot
                index_split = ind[i * split_length:(i + 1) * split_length]

                if model_idx == 0:
                    sampleTypeBackgroundColoring(axes[i], index_split,
                                                 init_type_mapping[i * split_length:(i + 1) * split_length])
                    axes[i].set_xlim([np.min(index_split), np.max(index_split)])

                axes[i].plot(index_split,
                             np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 0],axis=1),
                             label=experiment_title, linewidth=0.15, alpha=0.8)

            train_model_bias = np.nanmean(prediction_data[train_indices][:,:,3])
            train_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[train_indices][:,:,3])))
            train_model_mae = np.nanmean(np.absolute(prediction_data[train_indices][:,:,3]))

            test_model_bias = np.nanmean(prediction_data[test_indices][:,:,3])
            test_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[test_indices][:,:,3])))
            test_model_mae = np.nanmean(np.absolute(prediction_data[test_indices][:,:,3]))
            
            filtered_model_bias = np.nanmean(prediction_data[filtered_indices][:,:,3])
            filtered_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[filtered_indices][:,:,3])))
            filtered_model_mae = np.nanmean(np.absolute(prediction_data[filtered_indices][:,:,3]))


            model_mean_errors[experiment_title] = (train_model_bias, train_model_rmse, train_model_mae,
                                                   test_model_bias, test_model_rmse, test_model_mae,
                                                   filtered_model_bias, filtered_model_rmse, filtered_model_mae)


        # add mean errors of cosmo output predictions
        train_diff_cosmo = prediction_data[train_indices][:,:,1] - prediction_data[train_indices][:,:,2]
        train_cosmo_bias = np.nanmean(train_diff_cosmo)
        train_cosmo_rmse = np.sqrt(np.nanmean(np.square(train_diff_cosmo)))
        train_cosmo_mae = np.nanmean(np.absolute(train_diff_cosmo))

        test_diff_cosmo = prediction_data[test_indices][:,:,1] - prediction_data[test_indices][:,:,2]
        test_cosmo_bias = np.nanmean(test_diff_cosmo)
        test_cosmo_rmse = np.sqrt(np.nanmean(np.square(test_diff_cosmo)))
        test_cosmo_mae = np.nanmean(np.absolute(test_diff_cosmo))
        
        filtered_diff_cosmo = prediction_data[filtered_indices][:,:,1] - prediction_data[filtered_indices][:,:,2]
        filtered_cosmo_bias = np.nanmean(filtered_diff_cosmo)
        filtered_cosmo_rmse = np.sqrt(np.nanmean(np.square(filtered_diff_cosmo)))
        filtered_cosmo_mae = np.nanmean(np.absolute(filtered_diff_cosmo))
        
        # add COSMO-1 output prediction error
        model_mean_errors['COSMO-1'] = (train_cosmo_bias, train_cosmo_rmse, train_cosmo_mae,
                                        test_cosmo_bias, test_cosmo_rmse, test_cosmo_mae,
                                        filtered_cosmo_bias, filtered_cosmo_rmse, filtered_cosmo_mae)

        for i in range(n_subplots):
            axes[i].plot(ind[i * split_length:(i + 1) * split_length],
                         np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 1], axis=1), label='COSMO-1',
                         linewidth=0.15, alpha=0.8, color='b', linestyle='-.')
            axes[i].plot(ind[i * split_length:(i + 1) * split_length],
                         np.nanmean(prediction_data[i * split_length:(i + 1) * split_length, 2], axis=1), label='Prediction',
                         linewidth=0.15, alpha=0.8, color='m', linestyle='--')

            tick_step_size = np.maximum(split_length // 30, 1)
            axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length][::tick_step_size])
            axes[i].set_xticklabels(time_labels[i * split_length:(i + 1) * split_length][::tick_step_size])
            axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length], minor=True)
            # And a corresponding grid
            axes[i].grid(which='both')

            # Or if you want different settings for the grids:
            axes[i].grid(which='minor', alpha=0.2)
            axes[i].grid(which='major', alpha=0.5)

            handles, labels = axes[0].get_legend_handles_labels()

        axes[n_subplots - 1].set_xlabel('Time')
        axes[0].legend(handles, labels)
        plt.tight_layout()

        run_path = source_path + '/plots/prediction_runs/%s' % run
        if not os.path.exists(run_path):
            os.makedirs(run_path)

        fig.savefig(run_path + '/averaged_prediction.png', dpi=300)

        generatePredictionResultTable(output_path=run_path, results=model_mean_errors)