Python DataUtils.getTimeFromFileName Exemples

Langage de programmation: Python

Espace de nommage/Pack: utils.data

Class/Type: DataUtils

Méthode/Fonction: getTimeFromFileName

Exemples au hotexamples.com: 2

Python DataUtils.getTimeFromFileName - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de utils.data.DataUtils.getTimeFromFileName extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

getFeatureScaleFunctions(4)

getFilesToProcess(2)

getTimeFromFileName(2)

filterUnseenTestStations(1)

getDataFolds(1)

getDataStatistics(1)

getTimeInvariantStationFeatures(1)

getTrainTestFolds(1)

haversine(1)

normalizeTimeFeatures(1)

Méthodes fréquemment utilisées

getFeatureScaleFunctions (4)

getFilesToProcess (2)

getTimeFromFileName (2)

filterUnseenTestStations (1)

getDataFolds (1)

getDataStatistics (1)

getTimeInvariantStationFeatures (1)

getTrainTestFolds (1)

haversine (1)

normalizeTimeFeatures (1)

Exemple #1

0

Afficher le fichier

def plotPerStationPredictionRun(source_path, observation_path, n_parallel): # gather all models in source folder error_data_per_run_dict = defaultdict() for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True): model_name = path.split('/')[-2] with open(path, 'rb') as file: ds = pkl.load(file) for data_var in ds.data_vars: da = ds[data_var] try: error_data_per_run_dict[data_var] += [(model_name, da)] except: error_data_per_run_dict[data_var] = [(model_name, da)] # load observations OBS = xr.open_dataset(observation_path) # get the prediction lead time to adjust time labels prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1 for run_error_data in error_data_per_run_dict.items(): run = run_error_data[0] models = run_error_data[1] stations = run_error_data[1][0][1].station.data inits = run_error_data[1][0][1].init.data init_type_mapping = np.array(run_error_data[1][0][1].init_type_mapping) train_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'train'] test_indices = [idx for idx, item in enumerate(init_type_mapping) if item[1] == 'test'] sample_type_color_mapping = [mapping[1] for mapping in init_type_mapping] times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time) time_labels = [str(t)[:-13] for t in times] station_name_dict = get_station_dict(OBS, stations) model_station_mean_errors = {} # plot for each station the prediction run results in parallel with Pool(processes=n_parallel) as pool: process_results = [] for station_idx, station in enumerate(stations): print('Plotting of prediction run for station %s queued.' % station) process_results.append(pool.apply_async(plotPerStationPredictionRunWorker, (models, station, train_indices, test_indices, station_name_dict,sample_type_color_mapping, time_labels, source_path, run))) # aggregate results from all processes for ps_idx, ps_result in enumerate(process_results): # sync processes model_station_mean_error = ps_result.get() for experiment_title, station_data_list in model_station_mean_error.items(): try: model_station_mean_errors[experiment_title] += station_data_list except KeyError: model_station_mean_errors[experiment_title] = station_data_list print('[Process %s] Synchronized after plotting station.' % ps_idx) run_path = source_path + '/plots/prediction_runs/%s' % run if not os.path.exists(run_path): os.makedirs(run_path) generateStationPredictionResultTable(output_path=run_path, results=model_station_mean_errors)

Exemple #2

0

Afficher le fichier

def plotAveragedPredictionRun(source_path): # gather all models in source folder error_data_per_run_dict = {} for path in glob.glob(source_path + '/**/model_run_error.pkl', recursive=True): model_name = path.split('/')[-2] with open(path, 'rb') as file: ds = pkl.load(file) for data_var in ds.data_vars: inits = ds[data_var].init.data sample_type_mapping = [mapping[1] for mapping in ds[data_var].init_type_mapping] prediction_data = ds[data_var].data try: error_data_per_run_dict[data_var] += [(model_name, inits, prediction_data, sample_type_mapping)] except: error_data_per_run_dict[data_var] = [(model_name, inits, prediction_data, sample_type_mapping)] # get the prediction lead time to adjust time labels prediciton_lead_time = ds.attrs['config']['prediction_times'][0] if 'config' in ds.attrs else 1 times = DataUtils.getTimeFromFileName(inits, prediciton_lead_time) time_labels = [str(t)[:-13] for t in times] for run_error_data in error_data_per_run_dict.items(): run = run_error_data[0] model_mean_errors = {} n_subplots = 10 fig, axes = plt.subplots(n_subplots, figsize=(60, 20), sharey=True) for model_idx, model_error_data in enumerate(run_error_data[1]): N = len(model_error_data[1]) split_length = N // n_subplots ind = np.arange(N) # the x locations for the groups experiment_title = model_error_data[0] prediction_data = model_error_data[2] init_type_mapping = model_error_data[3] train_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'train'] test_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'test'] filtered_indices = [idx for idx, item in enumerate(init_type_mapping) if item == 'filterd'] for i in range(n_subplots): # split indexes into slices for each subplot index_split = ind[i * split_length:(i + 1) * split_length] if model_idx == 0: sampleTypeBackgroundColoring(axes[i], index_split, init_type_mapping[i * split_length:(i + 1) * split_length]) axes[i].set_xlim([np.min(index_split), np.max(index_split)]) axes[i].plot(index_split, np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 0],axis=1), label=experiment_title, linewidth=0.15, alpha=0.8) train_model_bias = np.nanmean(prediction_data[train_indices][:,:,3]) train_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[train_indices][:,:,3]))) train_model_mae = np.nanmean(np.absolute(prediction_data[train_indices][:,:,3])) test_model_bias = np.nanmean(prediction_data[test_indices][:,:,3]) test_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[test_indices][:,:,3]))) test_model_mae = np.nanmean(np.absolute(prediction_data[test_indices][:,:,3])) filtered_model_bias = np.nanmean(prediction_data[filtered_indices][:,:,3]) filtered_model_rmse = np.sqrt(np.nanmean(np.square(prediction_data[filtered_indices][:,:,3]))) filtered_model_mae = np.nanmean(np.absolute(prediction_data[filtered_indices][:,:,3])) model_mean_errors[experiment_title] = (train_model_bias, train_model_rmse, train_model_mae, test_model_bias, test_model_rmse, test_model_mae, filtered_model_bias, filtered_model_rmse, filtered_model_mae) # add mean errors of cosmo output predictions train_diff_cosmo = prediction_data[train_indices][:,:,1] - prediction_data[train_indices][:,:,2] train_cosmo_bias = np.nanmean(train_diff_cosmo) train_cosmo_rmse = np.sqrt(np.nanmean(np.square(train_diff_cosmo))) train_cosmo_mae = np.nanmean(np.absolute(train_diff_cosmo)) test_diff_cosmo = prediction_data[test_indices][:,:,1] - prediction_data[test_indices][:,:,2] test_cosmo_bias = np.nanmean(test_diff_cosmo) test_cosmo_rmse = np.sqrt(np.nanmean(np.square(test_diff_cosmo))) test_cosmo_mae = np.nanmean(np.absolute(test_diff_cosmo)) filtered_diff_cosmo = prediction_data[filtered_indices][:,:,1] - prediction_data[filtered_indices][:,:,2] filtered_cosmo_bias = np.nanmean(filtered_diff_cosmo) filtered_cosmo_rmse = np.sqrt(np.nanmean(np.square(filtered_diff_cosmo))) filtered_cosmo_mae = np.nanmean(np.absolute(filtered_diff_cosmo)) # add COSMO-1 output prediction error model_mean_errors['COSMO-1'] = (train_cosmo_bias, train_cosmo_rmse, train_cosmo_mae, test_cosmo_bias, test_cosmo_rmse, test_cosmo_mae, filtered_cosmo_bias, filtered_cosmo_rmse, filtered_cosmo_mae) for i in range(n_subplots): axes[i].plot(ind[i * split_length:(i + 1) * split_length], np.nanmean(prediction_data[i * split_length:(i + 1) * split_length,:, 1], axis=1), label='COSMO-1', linewidth=0.15, alpha=0.8, color='b', linestyle='-.') axes[i].plot(ind[i * split_length:(i + 1) * split_length], np.nanmean(prediction_data[i * split_length:(i + 1) * split_length, 2], axis=1), label='Prediction', linewidth=0.15, alpha=0.8, color='m', linestyle='--') tick_step_size = np.maximum(split_length // 30, 1) axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length][::tick_step_size]) axes[i].set_xticklabels(time_labels[i * split_length:(i + 1) * split_length][::tick_step_size]) axes[i].set_xticks(ind[i * split_length:(i + 1) * split_length], minor=True) # And a corresponding grid axes[i].grid(which='both') # Or if you want different settings for the grids: axes[i].grid(which='minor', alpha=0.2) axes[i].grid(which='major', alpha=0.5) handles, labels = axes[0].get_legend_handles_labels() axes[n_subplots - 1].set_xlabel('Time') axes[0].legend(handles, labels) plt.tight_layout() run_path = source_path + '/plots/prediction_runs/%s' % run if not os.path.exists(run_path): os.makedirs(run_path) fig.savefig(run_path + '/averaged_prediction.png', dpi=300) generatePredictionResultTable(output_path=run_path, results=model_mean_errors)