Python DataUtils.getTimeFromFileName Examples

Programming Language: Python

Namespace/Package Name: utils.data

Class/Type: DataUtils

Method/Function: getTimeFromFileName

Examples at hotexamples.com: 2

Python DataUtils.getTimeFromFileName - 2 examples found. These are the top rated real world Python examples of utils.data.DataUtils.getTimeFromFileName extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getFeatureScaleFunctions(4)

getFilesToProcess(2)

getTimeFromFileName(2)

filterUnseenTestStations(1)

getDataFolds(1)

getDataStatistics(1)

getTimeInvariantStationFeatures(1)

getTrainTestFolds(1)

haversine(1)

normalizeTimeFeatures(1)

Example #1

Show file

def plotPerStationPredictionRun(source_path, observation_path, n_parallel):
    # gather all models in source folder
    error_data_per_run_dict = defaultdict()
    for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True):
        model_name = path.split('/')[-2]
        with open(path, 'rb') as file:
            ds = pkl.load(file)
        for data_var in ds.data_vars:
            da = ds[data_var]
            try:
                error_data_per_run_dict[data_var] += [(model_name, da)]
            except:
                error_data_per_run_dict[data_var] = [(model_name, da)]

    # load observations
    OBS = xr.open_dataset(observation_path)
    # get the prediction lead time to adjust time labels
    prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1

    for run_error_data in error_data_per_run_dict.items():
        run = run_error_data[0]
        models = run_error_data[1]
        stations = run_error_data[1][0][1].station.data
        inits = run_error_data[1][0][1].init.data
        init_type_mapping = np.array(run_error_data[1][0][1].init_type_mapping)
        train_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'train']
        test_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'test']
        sample_type_color_mapping = [mapping[1] for mapping in init_type_mapping]
        times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time)
        time_labels = [str(t)[:-13] for t in times]

        station_name_dict = get_station_dict(OBS, stations)

        model_station_mean_errors = {}
        # plot for each station the prediction run results in parallel
        with Pool(processes=n_parallel) as pool:
            process_results = []

            for station_idx, station in enumerate(stations):
                print('Plotting of prediction run for station %s queued.' % station)
                process_results.append(pool.apply_async(plotPerStationPredictionRunWorker,
                                                        (models, station, train_indices, test_indices,
                                                         station_name_dict,sample_type_color_mapping,
                                                         time_labels, source_path, run)))

            # aggregate results from all processes
            for ps_idx, ps_result in enumerate(process_results):
                # sync processes
                model_station_mean_error = ps_result.get()

                for experiment_title, station_data_list in model_station_mean_error.items():
                    try:
                        model_station_mean_errors[experiment_title] += station_data_list
                    except KeyError:
                        model_station_mean_errors[experiment_title] = station_data_list

                print('[Process %s] Synchronized after plotting station.' % ps_idx)

        run_path = source_path + '/plots/prediction_runs/%s' % run
        if not os.path.exists(run_path):
            os.makedirs(run_path)

        generateStationPredictionResultTable(output_path=run_path, results=model_station_mean_errors)

Example #2

Show file

def plotAveragedPredictionRun(source_path):
    # gather all models in source folder
    error_data_per_run_dict = {}
    for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True):
        model_name = path.split('/')[-2]
        with open(path, 'rb') as file:
            ds = pkl.load(file)
        for data_var in ds.data_vars:
            inits = ds[data_var].init.data
            sample_type_mapping = [mapping[1] for mapping in ds[data_var].init_type_mapping]
            prediction_data = ds[data_var].data
            try:
                error_data_per_run_dict[data_var] += [(model_name, inits, prediction_data, sample_type_mapping)]
            except:
                error_data_per_run_dict[data_var] = [(model_name, inits, prediction_data, sample_type_mapping)]


    # get the prediction lead time to adjust time labels
    prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1

    times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time)
    time_labels = [str(t)[:-13] for t in times]

    for run_error_data in error_data_per_run_dict.items():
        run = run_error_data[0]
        model_mean_errors = {}
        n_subplots = 10
        fig, axes = plt.subplots(n_subplots, figsize=(60, 20), sharey=True)
        for model_idx, model_error_data in enumerate(run_error_data[1]):

            N = len(model_error_data[1])
            split_length = N // n_subplots
            ind = np.arange(N)  # the x locations for the groups
            experiment_title = model_error_data[0]
            prediction_data = model_error_data[2]
            init_type_mapping = model_error_data[3]
            train_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'train']
            test_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'test']
            filtered_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'filterd']


            for i in range(n_subplots):
                # split indexes into slices for each subplot
                index_split = ind[i * split_length:(i + 1) * split_length]

                if model_idx == 0:
                    sampleTypeBackgroundColoring(axes[i], index_split,
                                                 init_type_mapping[i * split_length:(i + 1) * split_length])
                    axes[i].set_xlim([np.min(index_split), np.max(index_split)])

                axes[i].plot(index_split,
                             np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 0],axis=1),
                             label=experiment_title, linewidth=0.15, alpha=0.8)

            train_model_bias = np.nanmean(prediction_data[train_indices][:,:,3])
            train_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[train_indices][:,:,3])))
            train_model_mae = np.nanmean(np.absolute(prediction_data[train_indices][:,:,3]))

            test_model_bias = np.nanmean(prediction_data[test_indices][:,:,3])
            test_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[test_indices][:,:,3])))
            test_model_mae = np.nanmean(np.absolute(prediction_data[test_indices][:,:,3]))
            
            filtered_model_bias = np.nanmean(prediction_data[filtered_indices][:,:,3])
            filtered_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[filtered_indices][:,:,3])))
            filtered_model_mae = np.nanmean(np.absolute(prediction_data[filtered_indices][:,:,3]))


            model_mean_errors[experiment_title] = (train_model_bias, train_model_rmse, train_model_mae,
                                                   test_model_bias, test_model_rmse, test_model_mae,
                                                   filtered_model_bias, filtered_model_rmse, filtered_model_mae)


        # add mean errors of cosmo output predictions
        train_diff_cosmo = prediction_data[train_indices][:,:,1] - prediction_data[train_indices][:,:,2]
        train_cosmo_bias = np.nanmean(train_diff_cosmo)
        train_cosmo_rmse = np.sqrt(np.nanmean(np.square(train_diff_cosmo)))
        train_cosmo_mae = np.nanmean(np.absolute(train_diff_cosmo))

        test_diff_cosmo = prediction_data[test_indices][:,:,1] - prediction_data[test_indices][:,:,2]
        test_cosmo_bias = np.nanmean(test_diff_cosmo)
        test_cosmo_rmse = np.sqrt(np.nanmean(np.square(test_diff_cosmo)))
        test_cosmo_mae = np.nanmean(np.absolute(test_diff_cosmo))
        
        filtered_diff_cosmo = prediction_data[filtered_indices][:,:,1] - prediction_data[filtered_indices][:,:,2]
        filtered_cosmo_bias = np.nanmean(filtered_diff_cosmo)
        filtered_cosmo_rmse = np.sqrt(np.nanmean(np.square(filtered_diff_cosmo)))
        filtered_cosmo_mae = np.nanmean(np.absolute(filtered_diff_cosmo))
        
        # add COSMO-1 output prediction error
        model_mean_errors['COSMO-1'] = (train_cosmo_bias, train_cosmo_rmse, train_cosmo_mae,
                                        test_cosmo_bias, test_cosmo_rmse, test_cosmo_mae,
                                        filtered_cosmo_bias, filtered_cosmo_rmse, filtered_cosmo_mae)

        for i in range(n_subplots):
            axes[i].plot(ind[i * split_length:(i + 1) * split_length],
                         np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 1], axis=1), label='COSMO-1',
                         linewidth=0.15, alpha=0.8, color='b', linestyle='-.')
            axes[i].plot(ind[i * split_length:(i + 1) * split_length],
                         np.nanmean(prediction_data[i * split_length:(i + 1) * split_length, 2], axis=1), label='Prediction',
                         linewidth=0.15, alpha=0.8, color='m', linestyle='--')

            tick_step_size = np.maximum(split_length // 30, 1)
            axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length][::tick_step_size])
            axes[i].set_xticklabels(time_labels[i * split_length:(i + 1) * split_length][::tick_step_size])
            axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length], minor=True)
            # And a corresponding grid
            axes[i].grid(which='both')

            # Or if you want different settings for the grids:
            axes[i].grid(which='minor', alpha=0.2)
            axes[i].grid(which='major', alpha=0.5)

            handles, labels = axes[0].get_legend_handles_labels()

        axes[n_subplots - 1].set_xlabel('Time')
        axes[0].legend(handles, labels)
        plt.tight_layout()

        run_path = source_path + '/plots/prediction_runs/%s' % run
        if not os.path.exists(run_path):
            os.makedirs(run_path)

        fig.savefig(run_path + '/averaged_prediction.png', dpi=300)

        generatePredictionResultTable(output_path=run_path, results=model_mean_errors)