def visualizeBackgroundIncrementAnalaysis(): input_folder = '/data/HYCOM/DA_HYCOM_TSIS/preproc' output_folder = '/data/HYCOM/DA_HYCOM_TSIS/SUMMARY/AssimilatedData' years = [2009] fields = ['srfhgt'] all_files = os.listdir(input_folder) model_files = [ join(input_folder, x) for x in all_files if x.find('model') != -1 ] obs_files = [ join(input_folder, x) for x in all_files if x.find('obs') != -1 ] inc_files = [ join(input_folder, x) for x in all_files if x.find('increment') != -1 ] # Sort all of them or they won't match inc_files.sort() model_files.sort() obs_files.sort() mvar = "temp" #"srfhgt" ovar = "sst" #"ssh" var_file = "/data/HYCOM/DA_HYCOM_TSIS/preproc/cov_mat/tops_ias_std.nc" data_lat_lon = read_netcdf(var_file, ['xc', 'yc']) lat = data_lat_lon['yc'][:] lon = data_lat_lon['xc'][:] for i in range(len(model_files)): model_data = read_netcdf(model_files[i], [mvar]) obs_data_m = read_netcdf(obs_files[i], [ovar]) inc_data = read_netcdf(inc_files[i], [mvar]) obs_data = model_data[mvar][:].copy() no_mask_at = np.logical_not(obs_data_m[ovar][:].mask) obs_data[no_mask_at] = obs_data_m[ovar][:][no_mask_at] data = np.array([model_data[mvar][:], obs_data, inc_data[mvar][:]]) plotMaps1(data, ["Background", "Observation", "Increment"], "Title", lat, lon) plt.savefig(join(output_folder, F"{i:004d}.png"), bbox_inches='tight')
def ComputeMinMaxSTDFields(file_name, fields_names, output_file): data = read_netcdf(file_name, [], [0]) out_fields = [] out_mins = [] out_maxs = [] out_vars = [] out_means = [] for field_name in fields_names: if len(data[field_name].shape) == 2: field = data[field_name][:] elif len(data[field_name].shape) == 3: field = data[field_name][0, :] # im = plt.imshow(np.flip(field, axis=0), cmap='gist_earth') # plt.colorbar(im) # plt.title(field_name) # plt.show() out_fields.append(field_name) out_mins.append(np.amin(field)) out_maxs.append(np.amax(field)) out_means.append(np.mean(field)) out_vars.append(np.var(field)) out_dic = { "Name": ["STD" for x in range(len(out_fields))], "Field": out_fields, "MIN": out_mins, "MAX": out_maxs, "MEAN": out_means, "VAR": out_vars, } df = pd.DataFrame.from_dict(out_dic) df.to_csv(output_file)
def data_gen_from_preproc(input_folder_preproc, config, ids, field_names, obs_field_names, output_fields, z_layers=[0]): """ This generator should generate X and Y for a CNN :param path: :param file_names: :return: """ ex_id = -1 np.random.shuffle(ids) batch_size = config[TrainingParams.batch_size] all_files = os.listdir(input_folder_preproc) obs_files = np.array([join(input_folder_preproc, x) for x in all_files if x.startswith('obs')]) increment_files = np.array([join(input_folder_preproc, x) for x in all_files if x.startswith('increment')]) model_files = np.array([join(input_folder_preproc, x) for x in all_files if x.startswith('model')]) var_file = join(input_folder_preproc, "cov_mat", "tops_ias_std.nc") obs_files.sort() increment_files.sort() model_files.sort() rows = config[ProjTrainingParams.rows] cols = config[ProjTrainingParams.cols] norm_type = config[ProjTrainingParams.norm_type] # Read the variance of selected var_field_names = config[ProjTrainingParams.fields_names_var] if len(var_field_names) > 0: input_fields_var = read_netcdf(var_file, var_field_names, z_layers) else: input_fields_var = [] while True: # These lines are for sequential selection if ex_id < (len(ids) - 1): # We are not supporting batch processing right now ex_id += 1 else: ex_id = 0 np.random.shuffle(ids) # We shuffle the folders every time we have tested all the examples c_id = ids[ex_id] try: output_file_name = increment_files[c_id] obs_file_name = obs_files[c_id] model_file_name = model_files[c_id] # Needs to validate that all the files are from the same date model_file_year, model_file_day = get_date_from_preproc_filename(model_file_name) obs_file_year, obs_file_day = get_date_from_preproc_filename(obs_file_name) output_file_year, output_file_day = get_date_from_preproc_filename(output_file_name) if (model_file_day != obs_file_day) or (model_file_day != output_file_day) or\ (model_file_year != obs_file_year) or (model_file_year != output_file_year): print(F"The year and day do not correspond between the files: {output_file_name}, {model_file_name}, {obs_file_name}") exit() # If any file doesn't exist, jump to the next example if not(exists(output_file_name)): print(F"File doesn't exist: {output_file_name}") continue # *********************** Reading files ************************** input_fields_model = read_netcdf(model_file_name, field_names, z_layers) input_fields_obs = read_netcdf(obs_file_name, obs_field_names, z_layers) output_field_increment = read_netcdf(output_file_name, output_fields, z_layers) succ_attempts = 0 while succ_attempts < batch_size: start_row = np.random.randint(0, 891 - rows) # These hardcoded numbers come from the specific size of these files start_col = np.random.randint(0, 1401 - cols) try: perc_ocean = 0.99 #0.99 if config[ModelParams.MODEL] == AiModels.UNET_2D_MULTISTREAMS: input_data, y_data = generateXandYMulti(input_fields_model, input_fields_obs, input_fields_var, output_field_increment, field_names, obs_field_names, var_field_names, output_fields, start_row, start_col, rows, cols, norm_type=norm_type) else: input_data, y_data = generateXandY(input_fields_model, input_fields_obs, input_fields_var, output_field_increment, field_names, obs_field_names, var_field_names, output_fields, start_row, start_col, rows, cols, norm_type=norm_type, perc_ocean=perc_ocean) except Exception as e: # print(F"Failed for {model_file_name} row:{start_row} col:{start_col}: {e}") continue succ_attempts += 1 # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land input_data = np.nan_to_num(input_data, nan=0) y_data = np.nan_to_num(y_data, nan=-0.5) # input_data = np.nan_to_num(input_data, nan=-1000) # y_data = np.nan_to_num(y_data, nan=-1000) # input_data = np.nan_to_num(input_data, nan=0) # y_data = np.nan_to_num(y_data, nan=0) if config[ModelParams.MODEL] == AiModels.UNET_2D_MULTISTREAMS: X = [np.expand_dims(x, axis=0) for x in input_data] else: X = np.expand_dims(input_data, axis=0) Y = np.expand_dims(y_data, axis=0) # --------------- Just for debugging Plotting input and output--------------------------- # import matplotlib.pyplot as plt # import pylab # # mincbar = np.nanmin(input_data) # # maxcbar = np.nanmax(input_data) # # # viz_obj = EOAImageVisualizer(output_folder=join(input_folder_preproc, "training_imgs"), disp_images=False, mincbar=mincbar, maxcbar=maxcbar) # viz_obj = EOAImageVisualizer(output_folder=join(input_folder_preproc, "training_imgs"), disp_images=False) # # # # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), y_data.swapaxes(0,2))), # viz_obj.plot_2d_data_np_raw(np.concatenate((X[0,:,:,:].swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2))), # var_names=[F"in_model_{x}" for x in field_names] + # [F"in_obs_{x}" for x in obs_field_names]+ # [F"out_inc_{x}" for x in output_fields], # rot_90=True, # file_name=F"{model_file_year}_{model_file_day}_{start_col}_{start_row}", # title=F"Input data: {field_names} and {obs_field_names}, output {output_fields}") yield X, Y # yield [np.zeros((1,160,160,1)) for x in range(7)], Y except Exception as e: print(F"----- Not able to generate for file number (from batch): {succ_attempts} ERROR: ", str(e))
def test_model(config): input_folder = config[PredictionParams.input_folder] output_folder = config[PredictionParams.output_folder] output_fields = config[ProjTrainingParams.output_fields] model_weights_file = config[PredictionParams.model_weights_file] output_imgs_folder = config[PredictionParams.output_imgs_folder] field_names_model = config[ProjTrainingParams.fields_names] field_names_obs = config[ProjTrainingParams.fields_names_obs] rows = config[ProjTrainingParams.rows] cols = config[ProjTrainingParams.cols] run_name = config[TrainingParams.config_name] norm_type = config[ProjTrainingParams.norm_type] output_imgs_folder = join(output_imgs_folder, run_name) create_folder(output_imgs_folder) # *********** Chooses the proper model *********** print('Reading model ....') net_type = config[ProjTrainingParams.network_type] if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream: model = select_2d_model(config, last_activation=None) if net_type == NetworkTypes.SimpleCNN_2: model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2) if net_type == NetworkTypes.SimpleCNN_4: model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2) if net_type == NetworkTypes.SimpleCNN_8: model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2) if net_type == NetworkTypes.SimpleCNN_16: model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2) plot_model(model, to_file=join(output_folder, F'running.png'), show_shapes=True) # *********** Reads the weights*********** print('Reading weights ....') model.load_weights(model_weights_file) # *********** Read files to predict*********** all_files = os.listdir(input_folder) all_files.sort() model_files = np.array([x for x in all_files if x.startswith('model')]) z_layers = [0] var_file = join(input_folder, "cov_mat", "tops_ias_std.nc") field_names_std = config[ProjTrainingParams.fields_names_var] if len(field_names_std) > 0: input_fields_std = read_netcdf(var_file, field_names_std, z_layers) else: input_fields_std = [] cmap_out = chooseCMAP(output_fields) cmap_model = chooseCMAP(field_names_model) cmap_obs = chooseCMAP(field_names_obs) cmap_std = chooseCMAP(field_names_std) tot_rows = 891 tot_cols = 1401 all_whole_mean_times = [] all_whole_sum_times = [] all_whole_rmse = [] # np.random.shuffle(model_files) # TODO this is only for testing for id_file, c_file in enumerate(model_files): # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) if day_of_year != 5: continue model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** input_fields_model = read_netcdf(model_file, field_names_model, z_layers) input_fields_obs = read_netcdf(obs_file, field_names_obs, z_layers) output_field_increment = read_netcdf(inc_file, output_fields, z_layers) # ******************* Normalizing and Cropping Data ******************* whole_cnn = np.zeros((891, 1401)) whole_y = np.zeros((891, 1401)) this_file_times = [] start_row = 0 donerow = False while not (donerow): donecol = False start_col = 0 while not (donecol): # print(F"{start_row}-{start_row+rows} {start_col}-{start_col+cols}") # Generate the proper inputs for the NN try: perc_ocean = .05 input_data, y_data = generateXandY(input_fields_model, input_fields_obs, input_fields_std, output_field_increment, field_names_model, field_names_obs, field_names_std, output_fields, start_row, start_col, rows, cols, norm_type=norm_type, perc_ocean=perc_ocean) except Exception as e: print(F"Land for {c_file} row:{start_row} col:{start_col}") start_col, donecol = verifyBoundaries( start_col, cols, tot_cols) continue # ******************* Replacing nan values ********* # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land input_data_nans = np.isnan(input_data) input_data = np.nan_to_num(input_data, nan=0) y_data = np.nan_to_num(y_data, nan=-0.5) X = np.expand_dims(input_data, axis=0) Y = np.expand_dims(y_data, axis=0) # Make the prediction of the network start = time.time() output_nn_original = model.predict(X, verbose=1) toc = time.time() - start this_file_times.append(toc) # print(F"Time to get prediction {toc:0.3f} seconds") # PLOT RAW DATA # import matplotlib.pyplot as plt # plt.imshow(np.flip(output_nn_original[0,:,:,0], axis=0)) # plt.imshow(np.flip(Y[0,:,:,0], axis=0)) # plt.show() # Original MSE # print(F"MSE: {mean_squared_error(Y[0,:,:,0], output_nn_original[0,:,:,0])}") # Make nan all values inside the land land_indexes = Y == -0.5 output_nn_original[land_indexes] = np.nan # ====================== PLOTS RAW DATA NOT NECESSARY ============================= # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False) # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))), # var_names=[F"in_model_{x}" for x in field_names_model] + # [F"in_obs_{x}" for x in field_names_obs] + # [F"in_var_{x}" for x in field_names_std] + # [F"out_inc_{x}" for x in output_fields] + # [F"cnn_{x}" for x in output_fields], # file_name=F"RAW_Input_and_CNN_{c_file}_{start_row:03d}_{start_col:03d}", # rot_90=True, # cols_per_row=len(field_names_model), # title=F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}") # Denormalize the data to the proper units in each field denorm_cnn_output = np.zeros(output_nn_original.shape) denorm_y = np.zeros(Y.shape) # ==== Denormalizingallinput and outputs denorm_cnn_output = denormalizeData(output_nn_original, output_fields, PreprocParams.type_inc, norm_type) denorm_y = denormalizeData(Y, output_fields, PreprocParams.type_inc, norm_type) input_types = [ PreprocParams.type_model for i in input_fields_model ] + [PreprocParams.type_obs for i in input_fields_obs ] + [PreprocParams.type_std for i in input_fields_std] denorm_input = denormalizeData( input_data, field_names_model + field_names_obs + field_names_std, input_types, norm_type) # Recover the original land areas, they are lost after denormalization denorm_input[input_data_nans] = np.nan denorm_y[land_indexes] = np.nan # Remove the 'extra dimension' denorm_cnn_output = np.squeeze(denorm_cnn_output) denorm_y = np.squeeze(denorm_y) whole_cnn[ start_row:start_row + rows, start_col:start_col + cols] = denorm_cnn_output # Add the the 'whole prediction' whole_y[start_row:start_row + rows, start_col:start_col + cols] = denorm_y # Add the the 'whole prediction' # if np.random.random() > .99: # Plot 1% of the times if True: # Plot 1% of the times if len( denorm_cnn_output.shape ) == 2: # In this case we only had one output and we need to make it 'array' to plot denorm_cnn_output = np.expand_dims(denorm_cnn_output, axis=2) denorm_y = np.expand_dims(denorm_y, axis=2) # Compute RMSE rmse_cnn = np.zeros(len(output_fields)) for i in range(len(output_fields)): ocean_indexes = np.logical_not( np.isnan(denorm_y[:, :, i])) rmse_cnn[i] = np.sqrt( mean_squared_error( denorm_cnn_output[:, :, i][ocean_indexes], denorm_y[:, :, i][ocean_indexes])) # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar, maxcbar=maxcbar) viz_obj = EOAImageVisualizer( output_folder=output_imgs_folder, disp_images=False) # ================== DISPLAYS ALL INPUTS AND OUTPUTS DENORMALIZED =================== # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))), viz_obj.plot_2d_data_np_raw( np.concatenate( (denorm_input.swapaxes(0, 2), denorm_y.swapaxes(0, 2), denorm_cnn_output.swapaxes(0, 2))), var_names=[F"in_model_{x}" for x in field_names_model] + [F"in_obs_{x}" for x in field_names_obs] + [F"in_var_{x}" for x in field_names_std] + [F"out_inc_{x}" for x in output_fields] + [F"cnn_{x}" for x in output_fields], file_name= F"Input_and_CNN_{c_file}_{start_row:03d}_{start_col:03d}", cmap=cmap_model + cmap_obs + cmap_std + cmap_out + cmap_out, rot_90=True, cols_per_row=len(field_names_model), title= F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}" ) # =========== Making the same color bar for desired output and the NN ===================== mincbar = [ np.nanmin(denorm_y[:, :, x]) for x in range(denorm_cnn_output.shape[-1]) ] maxcbar = [ np.nanmax(denorm_y[:, :, x]) for x in range(denorm_cnn_output.shape[-1]) ] error = (denorm_y - denorm_cnn_output).swapaxes(0, 2) mincbarerror = [ np.nanmin(error[i, :, :]) for i in range(len(output_fields)) ] maxcbarerror = [ np.nanmax(error[i, :, :]) for i in range(len(output_fields)) ] viz_obj = EOAImageVisualizer( output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar + mincbar + mincbarerror, maxcbar=maxcbar + maxcbar + maxcbarerror) # ================== Displays CNN and TSIS with RMSE ================ viz_obj.output_folder = join(output_imgs_folder, 'JoinedErrrorCNN') cmap = chooseCMAP(output_fields) error_cmap = cmocean.cm.diff viz_obj.plot_2d_data_np_raw( np.concatenate((denorm_cnn_output.swapaxes( 0, 2), denorm_y.swapaxes(0, 2), error), axis=0), var_names=[F"CNN INC {x}" for x in output_fields] + [F"TSIS INC {x}" for x in output_fields] + [F'RMSE {c_rmse_cnn:0.4f}' for c_rmse_cnn in rmse_cnn], file_name= F"AllError_{c_file}_{start_row:03d}_{start_col:03d}", rot_90=True, cmap=cmap + cmap + [error_cmap], cols_per_row=len(output_fields), title=F"{output_fields} RMSE: {np.mean(rmse_cnn):0.5f}" ) start_col, donecol = verifyBoundaries(start_col, cols, tot_cols) # Column for start_row, donerow = verifyBoundaries(start_row, rows, tot_rows) # Row for # ======= Plots whole output with RMSE mincbar = np.nanmin(whole_y) / 2 maxcbar = np.nanmax(whole_y) / 2 error = whole_y - whole_cnn mincbarerror = np.nanmin(error) / 2 maxcbarerror = np.nanmax(error) / 2 no_zero_ids = np.count_nonzero(whole_cnn) rmse_cnn = np.sqrt(np.nansum((whole_y - whole_cnn)**2) / no_zero_ids) all_whole_rmse.append(rmse_cnn) all_whole_mean_times.append(np.mean(np.array(this_file_times))) all_whole_sum_times.append(np.sum(np.array(this_file_times))) if np.random.random( ) > .9 or day_of_year == 353: # Plot 10% of the times viz_obj = EOAImageVisualizer( output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar + mincbar + mincbarerror, maxcbar=maxcbar + maxcbar + maxcbarerror) # mincbar=[-5, -5, -1], # maxcbar=[10, 10, 1]) # ================== Displays CNN and TSIS with RMSE ================ viz_obj.output_folder = join(output_imgs_folder, 'WholeOutput_CNN_TSIS') viz_obj.plot_2d_data_np_raw( [ np.flip(whole_cnn, axis=0), np.flip(whole_y, axis=0), np.flip(error, axis=0) ], var_names=[F"CNN INC {x}" for x in output_fields] + [F"TSIS INC {x}" for x in output_fields] + [F'RMSE {rmse_cnn:0.4f}'], file_name=F"WholeOutput_CNN_TSIS_{c_file}", rot_90=False, cols_per_row=3, cmap=cmocean.cm.algae, title=F"{output_fields} RMSE: {np.mean(rmse_cnn):0.5f}")
def singleModel(config): input_folder = config[PredictionParams.input_folder] rows = config[ProjTrainingParams.rows] cols = config[ProjTrainingParams.cols] model_field_names = config[ProjTrainingParams.fields_names] obs_field_names = config[ProjTrainingParams.fields_names_obs] output_fields = config[ProjTrainingParams.output_fields] run_name = config[TrainingParams.config_name] output_folder = join(config[PredictionParams.output_imgs_folder], 'MODEL_VISUALIZATION', run_name) norm_type = config[ProjTrainingParams.norm_type] model_weights_file = config[PredictionParams.model_weights_file] net_type = config[ProjTrainingParams.network_type] if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream: model = select_2d_model(config, last_activation=None) if net_type == NetworkTypes.SimpleCNN_2: model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2) if net_type == NetworkTypes.SimpleCNN_4: model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2) if net_type == NetworkTypes.SimpleCNN_8: model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2) if net_type == NetworkTypes.SimpleCNN_16: model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2) create_folder(output_folder) plot_model(model, to_file=join(output_folder, F'running.png'), show_shapes=True) print('Reading weights ....') model.load_weights(model_weights_file) # # All Number of parameters print(F' Number of parameters: {model.count_params()}') # Number of parameters by layer print(F' Number of parameters first CNN: {model.layers[1].count_params()}') # Example of plotting the filters of a single layer print("Printing layer names:") print_layer_names(model) # plot_cnn_filters_by_layer(model.layers[1], 'First set of filters') # The harcoded 1 should change by project # *********** Read files to predict*********** # # ========= Here you need to build your test input different in each project ==== all_files = os.listdir(input_folder) all_files.sort() # ========= Here you need to build your test input different in each project ==== all_files = os.listdir(input_folder) all_files.sort() model_files = np.array([x for x in all_files if x.startswith('model')]) z_layers = [0] var_file = join(input_folder, "cov_mat", "tops_ias_std.nc") var_field_names = config[ProjTrainingParams.fields_names_var] if len(var_field_names) > 0: input_fields_var = read_netcdf(var_file, var_field_names, z_layers) else: input_fields_var = [] np.random.shuffle(model_files) # TODO this is only for testing for id_file, c_file in enumerate(model_files): # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** z_layers = [0] input_fields_model = read_netcdf(model_file, model_field_names, z_layers) input_fields_obs = read_netcdf(obs_file, obs_field_names, z_layers) output_field_increment = read_netcdf(inc_file, output_fields, z_layers) # ******************* Normalizing and Cropping Data ******************* for start_row in np.arange(0, 891 - rows, rows): for start_col in np.arange(0, 1401 - cols, cols): try: input_data, y_data = generateXandY(input_fields_model, input_fields_obs, input_fields_var, output_field_increment, model_field_names, obs_field_names, var_field_names, output_fields, start_row, start_col, rows, cols, norm_type=norm_type) except Exception as e: print( F"Failed for {c_file} row:{start_row} col:{start_col}") continue X_nan = np.expand_dims(input_data, axis=0) Y_nan = np.expand_dims(y_data, axis=0) # ******************* Replacing nan values ********* # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land X = np.nan_to_num(X_nan, nan=0) Y = np.nan_to_num(Y_nan, nan=-0.5) output_nn = model.predict(X, verbose=1) output_nn[np.isnan(Y_nan)] = np.nan # =========== Output from the last layer (should be the same as output_NN print("Evaluating all intermediate layers") inp = model.input # input placeholder outputs = [ layer.output for layer in model.layers[1:] if layer.name.find("conv") != -1 ] # Displaying only conv layers # All evaluation functions (used to call the model up to each layer) functors = [K.function([inp], [out]) for out in outputs] # Outputs for every intermediate layer layer_outs = [func([X]) for func in functors] for layer_to_plot in range(0, len(outputs)): title = F'Layer {layer_to_plot}_{outputs[layer_to_plot].name}. {c_file}_{start_row:03d}_{start_col:03d}' file_name = F'{c_file}_{start_row:03d}_{start_col:03d}_lay_{layer_to_plot}' plot_intermediate_2dcnn_feature_map( layer_outs[layer_to_plot][0], input_data=X_nan, desired_output_data=Y_nan, nn_output_data=output_nn, input_fields=model_field_names + obs_field_names + var_field_names, title=title, output_folder=output_folder, file_name=file_name, disp_images=False)
def plot_raw_data_new(proc_id): """ This code makes two plots: 1) model and increment 2) model, increment and observations Depending on which plot you want to make, it reads field_names and fields_names_obs from the PreprocConfig file :param proc_id: :return: """ config = get_preproc_config() input_folder_tsis = config[PreprocParams.input_folder_tsis] input_folder_forecast = config[PreprocParams.input_folder_hycom] input_folder_obs = config[PreprocParams.input_folder_obs] output_folder = config[PreprocParams.imgs_output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] fields_obs = config[PreprocParams.fields_names_obs] plot_modes = config[PreprocParams.plot_modes_per_field] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # Iterate current year for c_year in YEARS: # Iterate current month for c_month in MONTHS: try: days_of_month, days_of_year = get_days_from_month(c_month) # Reads the data (DA, Free run, and observations) increment_files, increment_paths = get_hycom_file_name( input_folder_tsis, c_year, c_month) hycom_files, hycom_paths = get_hycom_file_name( input_folder_forecast, c_year, c_month, day_idx=2) obs_files, obs_paths = get_obs_file_names( input_folder_obs, c_year, c_month) except Exception as e: print(F"Failed to find any file for date {c_year}-{c_month}") continue # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): if (c_day_of_month % NUM_PROC) == proc_id: # Makes regular expression of the current desired file re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a' re_hycom = F'020_archv.{c_year}_{c_day_of_year:03d}\S*.a' # re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' # re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' re_obs = F'tsis_obs_gomb4_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' try: # Gets the proper index of the file for the three cases increment_file_idx = [ i for i, file in enumerate(increment_files) if re.search(re_tsis, file) != None ][0] hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom, file) != None ][0] obs_file_idx = [ i for i, file in enumerate(obs_files) if re.search(re_obs, file) != None ][0] except Exception as e: print( F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}" ) continue print( F" =============== Working with: {increment_files[increment_file_idx]} ============= " ) print( F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}" ) print( F"Available fields on model: {read_field_names(hycom_paths[hycom_file_idx])}" ) ds = xr.open_dataset(obs_paths[obs_file_idx]) print( F"Available fields on observations: {print(list(ds.keys()))}" ) model_state_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) increment_np_fields = read_hycom_fields( increment_paths[increment_file_idx], fields, layers=layers) # obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs, rename_fields=fields) obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs) # Iterate over the fields defined in PreprocConfig and plot them for idx_field, c_field_name in enumerate(fields): increment_np_c_field = increment_np_fields[ c_field_name] nan_indx = increment_np_c_field == 0 increment_np_c_field[nan_indx] = np.nan model_state_np_c_field = model_state_np_fields[ c_field_name] # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field # In these 2 cases, we only compute it for the surface layer # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0] # obs_np_c_field[502,609] - model_state_np_c_field[0][502,609] # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0] # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field) # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0]) # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0]) if c_field_name == "thknss": divide = 9806 model_state_np_c_field = model_state_np_c_field / divide increment_np_c_field = increment_np_c_field / divide if c_field_name == "srfhgt": inc = increment_np_c_field else: inc = (model_state_np_c_field - increment_np_c_field) # ======================= Only Background state and TSIS increment ================== try: title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}" img_viz.plot_3d_data_np( [model_state_np_c_field, inc], # img_viz.plot_3d_data_np([model_state_np_c_field, increment_np_c_field], var_names=['HYCOM', 'Increment (TSIS)'], title=title, file_name_prefix= F'ModelAndIncrement_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}', z_lavels_names=layers, flip_data=True, plot_mode=plot_modes[idx_field]) except Exception as e: print(F"Failed for field: {c_field_name}: {e}")
def plot_raw_data(proc_id): """ Makes images of the available data (Free run, DA and Observations) :param proc_id: :return: """ config = get_preproc_config() input_folder_tsis = config[PreprocParams.input_folder_tsis] input_folder_forecast = config[PreprocParams.input_folder_hycom] input_folder_obs = config[PreprocParams.input_folder_obs] output_folder = config[PreprocParams.imgs_output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] fields_obs = config[PreprocParams.fields_names_obs] plot_modes = config[PreprocParams.plot_modes_per_field] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # Iterate current year for c_year in YEARS: # Iterate current month for c_month in MONTHS: try: days_of_month, days_of_year = get_days_from_month(c_month) # Reads the data (DA, Free run, and observations) increment_files, increment_paths = get_hycom_file_name( input_folder_tsis, c_year, c_month) hycom_files, hycom_paths = get_hycom_file_name( input_folder_forecast, c_year, c_month) obs_files, obs_paths = get_obs_file_names( input_folder_obs, c_year, c_month) except Exception as e: print(F"Failed to find any file for date {c_year}-{c_month}") continue # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): if (c_day_of_month % NUM_PROC) == proc_id: # Makes regular expression of the current desired file re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a' re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' try: # Gets the proper index of the file for the three cases increment_file_idx = [ i for i, file in enumerate(increment_files) if re.search(re_tsis, file) != None ][0] hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom, file) != None ][0] obs_file_idx = [ i for i, file in enumerate(obs_files) if re.search(re_obs, file) != None ][0] except Exception as e: print( F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}" ) continue print( F" =============== Working with: {increment_files[increment_file_idx]} ============= " ) print( F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}" ) increment_np_fields = read_hycom_fields( increment_paths[increment_file_idx], fields, layers=layers) model_state_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs, layers=[0], rename_fields=fields) for idx_field, c_field_name in enumerate(fields): increment_np_c_field = increment_np_fields[ c_field_name] nan_indx = increment_np_c_field == 0 increment_np_c_field[nan_indx] = np.nan model_state_np_c_field = model_state_np_fields[ c_field_name] obs_np_c_field = obs_np_fields[c_field_name] # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field # In these 2 cases, we only compute it for the surface layer # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0] obs_np_c_field[502, 609] - model_state_np_c_field[0][502, 609] # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0] # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field) # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0]) # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0]) title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}" # ======================= Only Fredatae HYCOM, TSIS, Observations ================== img_viz.plot_3d_data_np( [ np.expand_dims(obs_np_c_field, 0), model_state_np_c_field, increment_np_c_field ], var_names=[ F'Observations', 'HYCOM', 'Increment (TSIS)' ], title=title, file_name_prefix= F'Summary_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}', z_lavels_names=layers, flip_data=True, plot_mode=plot_modes[idx_field])
import matplotlib.pyplot as plt from inout.io_netcdf import read_netcdf import numpy as np import matplotlib.patches as patches from os.path import join import os input_folder = "/home/olmozavala/Dropbox/MyProjects/EOAS/COAPS/MURI_AI_Ocean/Data_Assimilation/HYCOM-TSIS/testdata" input_file = "model_2009_205.nc" data = read_netcdf(join(input_folder, input_file), ["temp"]) fig, ax = plt.subplots() ax.imshow(np.flip(data["temp"], axis=0)) print(data["temp"].shape) found = 0 while found < 20: row = np.random.randint(0, 891) col = np.random.randint(0, 1401) ldata = data["temp"][row:row + 160, col:col + 160] if len(ldata[ldata.mask]) == 0: # Create a Rectangle patch print(F"Adding at: row:{row}-{row+160} and col:{col}-{col+160}") if row > 445: rect = patches.Rectangle((col, int(np.abs(445 - row))), 160, 160, linewidth=1, edgecolor='r',
def test_model(config): input_folder = config[PredictionParams.input_folder] output_folder = config[PredictionParams.output_folder] output_fields = config[ProjTrainingParams.output_fields] model_weights_file = config[PredictionParams.model_weights_file] output_imgs_folder = config[PredictionParams.output_imgs_folder] field_names_model = config[ProjTrainingParams.fields_names] field_names_obs = config[ProjTrainingParams.fields_names_obs] rows = config[ProjTrainingParams.rows] cols = config[ProjTrainingParams.cols] run_name = config[TrainingParams.config_name] norm_type = config[ProjTrainingParams.norm_type] output_imgs_folder = join(output_imgs_folder, run_name) create_folder(output_imgs_folder) # *********** Chooses the proper model *********** print('Reading model ....') net_type = config[ProjTrainingParams.network_type] if net_type == NetworkTypes.UNET or net_type == NetworkTypes.UNET_MultiStream: model = select_2d_model(config, last_activation=None) if net_type == NetworkTypes.SimpleCNN_2: model = simpleCNN(config, nn_type="2d", hid_lay=2, out_lay=2) if net_type == NetworkTypes.SimpleCNN_4: model = simpleCNN(config, nn_type="2d", hid_lay=4, out_lay=2) if net_type == NetworkTypes.SimpleCNN_8: model = simpleCNN(config, nn_type="2d", hid_lay=8, out_lay=2) if net_type == NetworkTypes.SimpleCNN_16: model = simpleCNN(config, nn_type="2d", hid_lay=16, out_lay=2) plot_model(model, to_file=join(output_folder, F'running.png'), show_shapes=True) # *********** Reads the weights*********** print('Reading weights ....') model.load_weights(model_weights_file) # *********** Read files to predict*********** all_files = os.listdir(input_folder) all_files.sort() model_files = np.array([x for x in all_files if x.startswith('model')]) z_layers = [0] var_file = join(input_folder, "cov_mat", "tops_ias_std.nc") field_names_std = config[ProjTrainingParams.fields_names_var] if len(field_names_std) > 0: input_fields_std = read_netcdf(var_file, field_names_std, z_layers) else: input_fields_std = [] cmap_out = chooseCMAP(output_fields) cmap_model = chooseCMAP(field_names_model) cmap_obs = chooseCMAP(field_names_obs) cmap_std = chooseCMAP(field_names_std) tot_rows = 891 tot_cols = 1401 all_whole_mean_times = [] all_whole_sum_times = [] all_whole_rmse = [] # np.random.shuffle(model_files) # TODO this is only for testing for id_file, c_file in enumerate(model_files): # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** input_fields_model = read_netcdf(model_file, field_names_model, z_layers) input_fields_obs = read_netcdf(obs_file, field_names_obs, z_layers) output_field_increment = read_netcdf(inc_file, output_fields, z_layers) # ******************* Normalizing and Cropping Data ******************* this_file_times = [] try: perc_ocean = .01 input_data, y_data = generateXandY(input_fields_model, input_fields_obs, input_fields_std, output_field_increment, field_names_model, field_names_obs, field_names_std, output_fields, 0, 0, grows, gcols, norm_type=norm_type, perc_ocean=perc_ocean) except Exception as e: print(F"Exception {e}") # ******************* Replacing nan values ********* # We set a value of 0.5 on the land. Trying a new loss function that do not takes into account land input_data_nans = np.isnan(input_data) input_data = np.nan_to_num(input_data, nan=0) y_data = np.nan_to_num(y_data, nan=-0.5) X = np.expand_dims(input_data, axis=0) Y = np.expand_dims(y_data, axis=0) # Make the prediction of the network start = time.time() output_nn_original = model.predict(X, verbose=1) toc = time.time() - start this_file_times.append(toc) # Make nan all values inside the land land_indexes = Y == -0.5 output_nn_original[land_indexes] = np.nan # ==== Denormalizingallinput and outputs denorm_cnn_output = denormalizeData(output_nn_original, output_fields, PreprocParams.type_inc, norm_type) denorm_y = denormalizeData(Y, output_fields, PreprocParams.type_inc, norm_type) input_types = [PreprocParams.type_model for i in input_fields_model] + [ PreprocParams.type_obs for i in input_fields_obs ] + [PreprocParams.type_std for i in input_fields_std] denorm_input = denormalizeData( input_data, field_names_model + field_names_obs + field_names_std, input_types, norm_type) # Recover the original land areas, they are lost after denormalization denorm_y[land_indexes] = np.nan # Remove the 'extra dimension' denorm_cnn_output = np.squeeze(denorm_cnn_output) denorm_y = np.squeeze(denorm_y) whole_cnn = denorm_cnn_output # Add the the 'whole prediction' whole_y = denorm_y # Add the the 'whole prediction' if len( denorm_cnn_output.shape ) == 2: # In this case we only had one output and we need to make it 'array' to plot denorm_cnn_output = np.expand_dims(denorm_cnn_output, axis=2) denorm_y = np.expand_dims(denorm_y, axis=2) # Compute RMSE # rmse_cnn = np.zeros(len(output_fields)) # for i in range(len(output_fields)): # ocean_indexes = np.logical_not(np.isnan(denorm_y[:,:,i])) # rmse_cnn[i] = np.sqrt(mean_squared_error(denorm_cnn_output[:,:,i][ocean_indexes], denorm_y[:,:,i][ocean_indexes])) # ================== DISPLAYS ALL INPUTS AND OUTPUTS DENORMALIZED =================== # Adding back mask to all the input variables denorm_input[input_data_nans] = np.nan # ======= Plots whole output with RMSE mincbar = np.nanmin(whole_y) maxcbar = np.nanmax(whole_y) error = whole_y - whole_cnn mincbarerror = np.nanmin(error) maxcbarerror = np.nanmax(error) no_zero_ids = np.count_nonzero(whole_cnn) if output_fields[ 0] == 'srfhgt': # This should only be for SSH to adjust the units whole_cnn /= 9.81 whole_y = np.array(whole_y) / 9.81 rmse_cnn = np.sqrt(np.nansum((whole_y - whole_cnn)**2) / no_zero_ids) all_whole_rmse.append(rmse_cnn) all_whole_mean_times.append(np.mean(np.array(this_file_times))) all_whole_sum_times.append(np.sum(np.array(this_file_times))) # if day_of_year == 353: # Plot 10% of the times if True: # Plot 10% of the times # viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False, mincbar=mincbar, maxcbar=maxcbar) viz_obj = EOAImageVisualizer(output_folder=output_imgs_folder, disp_images=False) # viz_obj.plot_2d_data_np_raw(np.concatenate((input_data.swapaxes(0,2), Y[0,:,:,:].swapaxes(0,2), output_nn_original[0,:,:,:].swapaxes(0,2))), viz_obj.plot_2d_data_np_raw( np.concatenate( (denorm_input.swapaxes(0, 2), denorm_y.swapaxes(0, 2), denorm_cnn_output.swapaxes(0, 2))), var_names=[F"in_model_{x}" for x in field_names_model] + [F"in_obs_{x}" for x in field_names_obs] + [F"in_var_{x}" for x in field_names_std] + [F"out_inc_{x}" for x in output_fields] + [F"cnn_{x}" for x in output_fields], file_name=F"Global_Input_and_CNN_{c_file}", rot_90=True, cmap=cmap_model + cmap_obs + cmap_std + cmap_out + cmap_out, cols_per_row=len(field_names_model), title= F"Input data: {field_names_model} and obs {field_names_obs}, increment {output_fields}, cnn {output_fields}" ) minmax = getMinMaxPlot(output_fields)[0] viz_obj = EOAImageVisualizer( output_folder=output_imgs_folder, disp_images=False, # mincbar=mincbar + mincbar + mincbarerror, # maxcbar=maxcbar + maxcbar + maxcbarerror) # mincbar=[minmax[0], minmax[0], max(minmax[0],-1)], # maxcbar=[minmax[1], minmax[1], min(minmax[1],1)]) mincbar=[minmax[0], minmax[0], -1], maxcbar=[minmax[1], minmax[1], 1]) # ================== Displays CNN and TSIS with RMSE ================ error_cmap = cmocean.cm.diff viz_obj.output_folder = join(output_imgs_folder, 'WholeOutput_CNN_TSIS') viz_obj.plot_2d_data_np_raw( [ np.flip(whole_cnn, axis=0), np.flip(whole_y, axis=0), np.flip(error, axis=0) ], # var_names=[F"CNN INC {x}" for x in output_fields] + [F"TSIS INC {x}" for x in output_fields] + [F'TSIS - CNN (Mean RMSE {rmse_cnn:0.4f} m)'], var_names=[F"CNN increment SSH" for x in output_fields] + [F"TSIS increment SSH" for x in output_fields] + [F'TSIS - CNN \n (Mean RMSE {rmse_cnn:0.4f} m)'], file_name=F"Global_WholeOutput_CNN_TSIS_{c_file}", rot_90=False, cmap=cmap_out + cmap_out + [error_cmap], cols_per_row=3, # title=F"{output_fields[0]} RMSE: {np.mean(rmse_cnn):0.5f} m.") title=F"SSH RMSE: {np.mean(rmse_cnn):0.5f} m.") print("DONE ALL FILES!!!!!!!!!!!!!") dic_summary = { "File": model_files, "rmse": all_whole_rmse, "times mean": all_whole_mean_times, "times sum": all_whole_sum_times, } df = pd.DataFrame.from_dict(dic_summary) df.to_csv(join(output_imgs_folder, "Global_RMSE_and_times.csv"))
def data_gen_hycomtsis(paths, file_names, obs_path, field_names, obs_field_names, output_field, days_separation=1, z_layers=[0]): """ This generator should generate X and Y for a CNN :param path: :param file_names: :return: """ ex_id = -1 ids = np.arange(len(file_names)) while True: # These lines are for sequential selection if ex_id < (len(ids) - 1): ex_id += 1 else: ex_id = 0 np.random.shuffle( ids ) # We shuffle the folders every time we have tested all the examples file_name = join(paths[ex_id], file_names[ex_id]) date_str = file_names[ex_id].split('.')[1] # This should be the date date_array = date_str.split('_') year = int(date_array[0]) day_of_year = int(date_array[1]) month, day_month = get_month_and_day_of_month_from_day_of_year( day_of_year, year) # Verify next time exist cur_date = date(year, month, day_month) desired_date = date(year, month, day_month) + timedelta(days=days_separation) desired_file_name = F'archv.{desired_date.year}_{get_day_of_year_from_month_and_day(desired_date.month, desired_date.day):03d}_00.a' if not (exists(join(paths[ex_id], desired_file_name))): print(F"Warning! File {desired_file_name} doesn't exist") continue # try: # *********************** Reading DA files ************************** input_fields_da = read_hycom_output(file_name, field_names, layers=z_layers) output_field_da = read_hycom_output(join(paths[ex_id], desired_file_name), [output_field], layers=z_layers) # *********************** Reading Obs file ************************** # TODO Hardcoded text "WITH_PIES" obs_file_name = join( obs_path, "WITH_PIES", F"tsis_obs_ias_{desired_date.year}{desired_date.month:02d}{desired_date.day:02d}00.nc" ) if not (exists(obs_file_name)): print(F"Warning! Observation file doesn't exist {obs_file_name}") continue # ******************* Normalizing and Cropping Data ******************* # TODO hardcoded dimensions and cropping code input_fields_obs = read_netcdf(obs_file_name, obs_field_names, z_layers) # dims = input_fields_da[field_names[0]].shape rows = 888 cols = 1400 num_fields = 8 data_cube = np.zeros((rows, cols, num_fields)) id_field = 0 for c_field in field_names: # data_cube[id_field, :, :] = input_fields_da[c_field][0, :, :] data_cube[:, :, id_field] = (input_fields_da[c_field][0, :rows, :cols] - MIN_DA[c_field]) / MAX_DA[c_field] id_field += 1 for c_field in obs_field_names: # if len(input_fields_obs[c_field].shape) == 3: # data_cube[id_field, :, :] = input_fields_obs[c_field][0, :, :] # data_cube[:, :, id_field] = input_fields_obs[c_field][0, :rows, :cols] if len(input_fields_obs[c_field].shape) == 2: # data_cube[id_field, :, :] = input_fields_obs[c_field][:, :] data_cube[:, :, id_field] = (input_fields_obs[c_field][:rows, :cols] - MIN_OBS[c_field]) / MAX_OBS[c_field] id_field += 1 # ******************* Replacing nan values ********* # Only use slices that have data (lesion inside) X = np.expand_dims(data_cube, axis=0) Y = np.expand_dims(np.expand_dims( output_field_da[output_field][0, :rows, :cols], axis=2), axis=0) X = np.nan_to_num(X, nan=-1) Y = np.nan_to_num(Y, nan=-1) # img_viz.plot_3d_data_singlevar_np(np.swapaxes(np.swapaxes(X[0],0,2), 1,2), # z_levels=range(len(field_names+obs_field_names)), # title='Input NN', # file_name_prefix=F'{year}_{month:02d}_{day_month:02d}', # flip_data=True) # # img_viz.plot_3d_data_singlevar_np(np.swapaxes(np.swapaxes(Y[0],0,2), 1,2), # z_levels=[0], # title='Input NN', # file_name_prefix=F'output_{year}_{month:02d}_{day_month:02d}', # flip_data=True) yield X, Y
def ComputeOverallMinMaxVar(): """ Computes the mean, max and variance for all the fields in the files :return: """ config = get_training_2d() input_folder = config[ProjTrainingParams.input_folder_preproc] fields = config[ProjTrainingParams.fields_names] fields_obs = config[ProjTrainingParams.fields_names_obs] max_values_model = {field: 0 for field in fields} min_values_model = {field: 10**5 for field in fields} max_values_obs = {field: 0 for field in fields_obs} min_values_obs = {field: 10**5 for field in fields_obs} max_values_inc = {field: 0 for field in fields} min_values_inc = {field: 10**5 for field in fields} mean_values_model = {field: 0 for field in fields} mean_values_obs = {field: 0 for field in fields_obs} mean_values_inc = {field: 0 for field in fields} var_values_model = {field: 0 for field in fields} var_values_obs = {field: 0 for field in fields_obs} var_values_inc = {field: 0 for field in fields} # These are the data assimilated files all_files = os.listdir(input_folder) all_files.sort() model_files = np.array([x for x in all_files if x.startswith('model')]) model_files.sort() # model_files = model_files[55:58] tot_files = len(model_files) # Iterate over all the model files for id_file, c_file in enumerate(model_files): print(F"Working with {c_file}") # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** z_layers = [0] input_fields_model = read_netcdf(model_file, fields, z_layers) input_fields_obs = read_netcdf(obs_file, fields_obs, z_layers) output_field_increment = read_netcdf(inc_file, fields, z_layers) # =============== Computing max values for the model for idx_field, c_field_name in enumerate(fields): da_np_c_field = input_fields_model[c_field_name] # Computing mean also mean_values_model[c_field_name] += np.nanmean( da_np_c_field) / tot_files c_max = np.nanmax(da_np_c_field) c_min = np.nanmin(da_np_c_field) if c_max >= max_values_model[c_field_name]: max_values_model[c_field_name] = c_max if c_min <= min_values_model[c_field_name]: min_values_model[c_field_name] = c_min # print(F"Cur max for model: {max_values_model}") # print(F"Cur max for model: {min_values_model}") # =============== Computing max values for the observations for idx_field, c_field_name in enumerate(fields_obs): da_np_c_field = input_fields_obs[c_field_name] # We needed to add this try because in some cases there are none observations, like in day 245 try: mean_values_obs[c_field_name] += np.nanmean( da_np_c_field) / tot_files except Exception as e: mean_values_obs[c_field_name] += 0 print(F' {c_file}:{c_field_name}: {mean_values_obs[c_field_name]}') c_max = np.nanmax(da_np_c_field) c_min = np.nanmin(da_np_c_field) if c_max >= max_values_obs[c_field_name]: max_values_obs[c_field_name] = c_max if c_min <= min_values_obs[c_field_name]: min_values_obs[c_field_name] = c_min # print(F"Cur max for obs: {max_values_obs}") # print(F"Cur min for obs: {min_values_obs}") # =============== Computing max values for the increment for idx_field, c_field_name in enumerate(fields): da_np_c_field = output_field_increment[c_field_name] # Computing mean also mean_values_inc[c_field_name] += np.nanmean( da_np_c_field) / tot_files c_max = np.nanmax(da_np_c_field) c_min = np.nanmin(da_np_c_field) if c_max >= max_values_inc[c_field_name]: max_values_inc[c_field_name] = c_max if c_min <= min_values_inc[c_field_name]: min_values_inc[c_field_name] = c_min # print(F"Cur max for inc: {max_values_inc}") # print(F"Cur min for inc: {min_values_inc}") # Computing STD print("=============================== Computing Variance....") for id_file, c_file in enumerate(model_files): print(F"Working with {c_file}") # Find current and next date year = int(c_file.split('_')[1]) day_of_year = int(c_file.split('_')[2].split('.')[0]) model_file = join(input_folder, F'model_{year}_{day_of_year:03d}.nc') inc_file = join(input_folder, F'increment_{year}_{day_of_year:03d}.nc') obs_file = join(input_folder, F'obs_{year}_{day_of_year:03d}.nc') # *********************** Reading files ************************** z_layers = [0] input_fields_model = read_netcdf(model_file, fields, z_layers) input_fields_obs = read_netcdf(obs_file, fields_obs, z_layers) output_field_increment = read_netcdf(inc_file, fields, z_layers) # =============== Computing max values for the model for idx_field, c_field_name in enumerate(fields): da_np_c_field = input_fields_model[c_field_name] var_values_model[c_field_name] += np.nanmean( (da_np_c_field - mean_values_model[c_field_name])** 2) / tot_files # =============== Computing max values for the observations for idx_field, c_field_name in enumerate(fields_obs): da_np_c_field = input_fields_obs[c_field_name] data = (da_np_c_field[:].filled(np.nan) - mean_values_obs[c_field_name])**2 if (np.logical_not(np.isnan(data)).any()): var_values_obs[c_field_name] += np.nanmean(data) / tot_files # print(F' {c_file}:{c_field_name}: {var_values_obs[c_field_name]}') # =============== Computing max values for the increment for idx_field, c_field_name in enumerate(fields): da_np_c_field = output_field_increment[c_field_name] var_values_inc[c_field_name] += np.nanmean( (da_np_c_field - mean_values_inc[c_field_name])**2) / tot_files print("----------------- Model --------------------") f = open("MIN_MAX_MEAN_STD.csv", 'w') text = F"TYPE,Field,MIN,MAX,MEAN,VARIANCE,STD\n" f.write(text) for c_field_name in fields: text = F"MODEL,{c_field_name}, {min_values_model[c_field_name]:0.6f}, {max_values_model[c_field_name]:0.6f}, " \ F" {mean_values_model[c_field_name]:0.6f}, {var_values_model[c_field_name]: 0.6f}, {np.sqrt(var_values_model[c_field_name]): 0.6f}\n" f.write(text) print(text) print("----------------- Observations --------------------") for c_field_name in fields_obs: text = F"OBS,{c_field_name}, {min_values_obs[c_field_name]:0.6f}, {max_values_obs[c_field_name]:0.6f}, " \ F" {mean_values_obs[c_field_name]:0.6f}, {var_values_obs[c_field_name]: 0.6f}, {np.sqrt(var_values_obs[c_field_name]): 0.6f}\n" f.write(text) print(text) print("----------------- Increment --------------------") for c_field_name in fields: text = F"INC,{c_field_name}, {min_values_inc[c_field_name]:0.6f}, {max_values_inc[c_field_name]:0.6f}," \ F" {mean_values_inc[c_field_name]:0.6f}, {var_values_inc[c_field_name]: 0.6f}, {np.sqrt(var_values_inc[c_field_name]): 0.6f}\n" f.write(text) print(text) f.close()