from inout.io_common import create_folder, select_cases_from_folder from constants.AI_params import * from models.modelSelector import select_2d_model import AI_proj.trainingutils as utilsNN from tensorflow.keras.utils import plot_model import tensorflow as tf from os.path import join tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True}) if __name__ == '__main__': config = get_training_2d() input_folder = config[TrainingParams.input_folder] output_folder = config[TrainingParams.output_folder] val_perc = config[TrainingParams.validation_percentage] test_perc = config[TrainingParams.test_percentage] eval_metrics = config[TrainingParams.evaluation_metrics] loss_func = config[TrainingParams.loss_function] batch_size = config[TrainingParams.batch_size] epochs = config[TrainingParams.epochs] img_names = config[TrainingParams.image_file_names] model_name_user = config[TrainingParams.config_name] ctr_names = config[TrainingParams.ctr_file_names] optimizer = config[TrainingParams.optimizer] nn_input_size = config[ModelParams.INPUT_SIZE]
def ComputeOverallMinMaxVar(): """ Computes the mean, max and variance for all the fields in the files :return: """ config = get_training_2d() input_folder = config[ProjTrainingParams.input_folder_preproc] fields = config[ProjTrainingParams.fields_names] fields_obs = config[ProjTrainingParams.fields_names_obs] max_values_model = {field: 0 for field in fields} min_values_model = {field: 10**5 for field in fields} max_values_obs = {field: 0 for field in fields_obs} min_values_obs = {field: 10**5 for field in fields_obs} max_values_inc = {field: 0 for field in fields} min_values_inc = {field: 10**5 for field in fields} mean_values_model = {field: 0 for field in fields} mean_values_obs = {field: 0 for field in fields_obs} mean_values_inc = {field: 0 for field in fields} var_values_model = {field: 0 for field in fields} var_values_obs = {field: 0 for field in fields_obs} var_values_inc = {field: 0 for field in fields} # These are the data assimilated files all_files = os.listdir(input_folder) all_files.sort() model_files = np.array([x for x in all_files if x.startswith('model')]) model_files.sort() # model_files = model_files[55:58] tot_files = len(model_files) # Iterate over all the model files for id_file, c_file in enumerate(model_files): print(F"Working with {c_file}") # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** z_layers = [0] input_fields_model = read_netcdf(model_file, fields, z_layers) input_fields_obs = read_netcdf(obs_file, fields_obs, z_layers) output_field_increment = read_netcdf(inc_file, fields, z_layers) # =============== Computing max values for the model for idx_field, c_field_name in enumerate(fields): da_np_c_field = input_fields_model[c_field_name] # Computing mean also mean_values_model[c_field_name] += np.nanmean( da_np_c_field) / tot_files c_max = np.nanmax(da_np_c_field) c_min = np.nanmin(da_np_c_field) if c_max >= max_values_model[c_field_name]: max_values_model[c_field_name] = c_max if c_min <= min_values_model[c_field_name]: min_values_model[c_field_name] = c_min # print(F"Cur max for model: {max_values_model}") # print(F"Cur max for model: {min_values_model}") # =============== Computing max values for the observations for idx_field, c_field_name in enumerate(fields_obs): da_np_c_field = input_fields_obs[c_field_name] # We needed to add this try because in some cases there are none observations, like in day 245 try: mean_values_obs[c_field_name] += np.nanmean( da_np_c_field) / tot_files except Exception as e: mean_values_obs[c_field_name] += 0 print(F' {c_file}:{c_field_name}: {mean_values_obs[c_field_name]}') c_max = np.nanmax(da_np_c_field) c_min = np.nanmin(da_np_c_field) if c_max >= max_values_obs[c_field_name]: max_values_obs[c_field_name] = c_max if c_min <= min_values_obs[c_field_name]: min_values_obs[c_field_name] = c_min # print(F"Cur max for obs: {max_values_obs}") # print(F"Cur min for obs: {min_values_obs}") # =============== Computing max values for the increment for idx_field, c_field_name in enumerate(fields): da_np_c_field = output_field_increment[c_field_name] # Computing mean also mean_values_inc[c_field_name] += np.nanmean( da_np_c_field) / tot_files c_max = np.nanmax(da_np_c_field) c_min = np.nanmin(da_np_c_field) if c_max >= max_values_inc[c_field_name]: max_values_inc[c_field_name] = c_max if c_min <= min_values_inc[c_field_name]: min_values_inc[c_field_name] = c_min # print(F"Cur max for inc: {max_values_inc}") # print(F"Cur min for inc: {min_values_inc}") # Computing STD print("=============================== Computing Variance....") for id_file, c_file in enumerate(model_files): print(F"Working with {c_file}") # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** z_layers = [0] input_fields_model = read_netcdf(model_file, fields, z_layers) input_fields_obs = read_netcdf(obs_file, fields_obs, z_layers) output_field_increment = read_netcdf(inc_file, fields, z_layers) # =============== Computing max values for the model for idx_field, c_field_name in enumerate(fields): da_np_c_field = input_fields_model[c_field_name] var_values_model[c_field_name] += np.nanmean( (da_np_c_field - mean_values_model[c_field_name])** 2) / tot_files # =============== Computing max values for the observations for idx_field, c_field_name in enumerate(fields_obs): da_np_c_field = input_fields_obs[c_field_name] data = (da_np_c_field[:].filled(np.nan) - mean_values_obs[c_field_name])**2 if (np.logical_not(np.isnan(data)).any()): var_values_obs[c_field_name] += np.nanmean(data) / tot_files # print(F' {c_file}:{c_field_name}: {var_values_obs[c_field_name]}') # =============== Computing max values for the increment for idx_field, c_field_name in enumerate(fields): da_np_c_field = output_field_increment[c_field_name] var_values_inc[c_field_name] += np.nanmean( (da_np_c_field - mean_values_inc[c_field_name])**2) / tot_files print("----------------- Model --------------------") f = open("MIN_MAX_MEAN_STD.csv", 'w') text = F"TYPE,Field,MIN,MAX,MEAN,VARIANCE,STD\n" f.write(text) for c_field_name in fields: text = F"MODEL,{c_field_name}, {min_values_model[c_field_name]:0.6f}, {max_values_model[c_field_name]:0.6f}, " \ F" {mean_values_model[c_field_name]:0.6f}, {var_values_model[c_field_name]: 0.6f}, {np.sqrt(var_values_model[c_field_name]): 0.6f}\n" f.write(text) print(text) print("----------------- Observations --------------------") for c_field_name in fields_obs: text = F"OBS,{c_field_name}, {min_values_obs[c_field_name]:0.6f}, {max_values_obs[c_field_name]:0.6f}, " \ F" {mean_values_obs[c_field_name]:0.6f}, {var_values_obs[c_field_name]: 0.6f}, {np.sqrt(var_values_obs[c_field_name]): 0.6f}\n" f.write(text) print(text) print("----------------- Increment --------------------") for c_field_name in fields: text = F"INC,{c_field_name}, {min_values_inc[c_field_name]:0.6f}, {max_values_inc[c_field_name]:0.6f}," \ F" {mean_values_inc[c_field_name]:0.6f}, {var_values_inc[c_field_name]: 0.6f}, {np.sqrt(var_values_inc[c_field_name]): 0.6f}\n" f.write(text) print(text) f.close()