def get_hycom_file_name(input_folder, year, month, day_idx=1): """ This function obtains the complete path of the files for the specified month and year, stored by Dmitry with DA :param input_folder: :param year: :param month: :param day_idx: Indicates the position of the day of the year in the split by '_' of the file name :return: str array [file names], str array [file paths] """ _, days_of_year = get_days_from_month(month) # folder = join(input_folder, F'{str(year)}') folder = input_folder all_files = os.listdir(folder) all_files = [x for x in all_files if isfile(join(input_folder, x))] selected_files = [file for file in all_files if int(file.split('_')[day_idx]) in days_of_year] selected_files.sort() return selected_files, [join(folder,c_file) for c_file in selected_files]
def img_generation_hycom(proc_id): """ Makes images of the available data (Free run, DA and Observations) :param proc_id: :return: """ config = get_preproc_config() input_folder_tsis = config[PreprocParams.input_folder_tsis] input_folder_forecast = config[PreprocParams.input_folder_hycom] input_folder_obs = config[PreprocParams.input_folder_obs] output_folder = config[PreprocParams.imgs_output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] fields_obs = config[PreprocParams.fields_names_obs] plot_modes = config[PreprocParams.plot_modes_per_field] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # Iterate current year for c_year in YEARS: # Iterate current month for c_month in MONTHS: try: days_of_month, days_of_year = get_days_from_month(c_month) # Reads the data (DA, Free run, and observations) hycom_files, hycom_paths = get_hycom_file_name( input_folder_forecast, c_year, c_month) except Exception as e: print(F"Failed to find any file for date {c_year}-{c_month}") continue # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): if (c_day_of_month % NUM_PROC) == proc_id: # Makes regular expression of the current desired file re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' try: # Gets the proper index of the file for the three cases hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom, file) != None ][0] except Exception as e: print( F"ERROR: The file for date {c_year} - {c_month} - {c_day_of_month} doesn't exist: {e}" ) continue print( F" =============== Working with: {hycom_files[hycom_file_idx]} ============= " ) print( F"Available fields: {read_field_names(hycom_paths[hycom_file_idx])}" ) model_state_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) for idx_field, c_field_name in enumerate(fields): model_state_np_c_field = model_state_np_fields[ c_field_name] title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}" # ======================= Only Fredatae HYCOM, TSIS, Observations ================== img_viz.plot_3d_data_np( [model_state_np_c_field], var_names=[F'HYCOM'], title=title, file_name_prefix= F'HYCOM_{c_field_name}_{c_year}_{c_month:02d}_{c_day_of_month:02d}', z_lavels_names=layers, flip_data=True, plot_mode=plot_modes[idx_field])
def plot_raw_data_new(proc_id): """ This code makes two plots: 1) model and increment 2) model, increment and observations Depending on which plot you want to make, it reads field_names and fields_names_obs from the PreprocConfig file :param proc_id: :return: """ config = get_preproc_config() input_folder_tsis = config[PreprocParams.input_folder_tsis] input_folder_forecast = config[PreprocParams.input_folder_hycom] input_folder_obs = config[PreprocParams.input_folder_obs] output_folder = config[PreprocParams.imgs_output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] fields_obs = config[PreprocParams.fields_names_obs] plot_modes = config[PreprocParams.plot_modes_per_field] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # Iterate current year for c_year in YEARS: # Iterate current month for c_month in MONTHS: try: days_of_month, days_of_year = get_days_from_month(c_month) # Reads the data (DA, Free run, and observations) increment_files, increment_paths = get_hycom_file_name( input_folder_tsis, c_year, c_month) hycom_files, hycom_paths = get_hycom_file_name( input_folder_forecast, c_year, c_month, day_idx=2) obs_files, obs_paths = get_obs_file_names( input_folder_obs, c_year, c_month) except Exception as e: print(F"Failed to find any file for date {c_year}-{c_month}") continue # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): if (c_day_of_month % NUM_PROC) == proc_id: # Makes regular expression of the current desired file re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a' re_hycom = F'020_archv.{c_year}_{c_day_of_year:03d}\S*.a' # re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' # re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' re_obs = F'tsis_obs_gomb4_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' try: # Gets the proper index of the file for the three cases increment_file_idx = [ i for i, file in enumerate(increment_files) if re.search(re_tsis, file) != None ][0] hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom, file) != None ][0] obs_file_idx = [ i for i, file in enumerate(obs_files) if re.search(re_obs, file) != None ][0] except Exception as e: print( F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}" ) continue print( F" =============== Working with: {increment_files[increment_file_idx]} ============= " ) print( F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}" ) print( F"Available fields on model: {read_field_names(hycom_paths[hycom_file_idx])}" ) ds = xr.open_dataset(obs_paths[obs_file_idx]) print( F"Available fields on observations: {print(list(ds.keys()))}" ) model_state_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) increment_np_fields = read_hycom_fields( increment_paths[increment_file_idx], fields, layers=layers) # obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs, rename_fields=fields) obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs) # Iterate over the fields defined in PreprocConfig and plot them for idx_field, c_field_name in enumerate(fields): increment_np_c_field = increment_np_fields[ c_field_name] nan_indx = increment_np_c_field == 0 increment_np_c_field[nan_indx] = np.nan model_state_np_c_field = model_state_np_fields[ c_field_name] # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field # In these 2 cases, we only compute it for the surface layer # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0] # obs_np_c_field[502,609] - model_state_np_c_field[0][502,609] # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0] # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field) # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0]) # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0]) if c_field_name == "thknss": divide = 9806 model_state_np_c_field = model_state_np_c_field / divide increment_np_c_field = increment_np_c_field / divide if c_field_name == "srfhgt": inc = increment_np_c_field else: inc = (model_state_np_c_field - increment_np_c_field) # ======================= Only Background state and TSIS increment ================== try: title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}" img_viz.plot_3d_data_np( [model_state_np_c_field, inc], # img_viz.plot_3d_data_np([model_state_np_c_field, increment_np_c_field], var_names=['HYCOM', 'Increment (TSIS)'], title=title, file_name_prefix= F'ModelAndIncrement_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}', z_lavels_names=layers, flip_data=True, plot_mode=plot_modes[idx_field]) except Exception as e: print(F"Failed for field: {c_field_name}: {e}")
def compute_consecutive_days_difference(): """ Computes the difference between consecutive days on the hycom files. :param proc_id: :return: """ config = get_preproc_config() input_folder_forecast = config[PreprocParams.input_folder_hycom] output_folder = config[PreprocParams.imgs_output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # Iterate current year for c_year in YEARS: # Iterate current month diff_per_field = {field: [] for field in fields} days_with_data = [] for c_month in MONTHS: # Reading the data try: days_of_month, days_of_year = get_days_from_month(c_month) # Reading hycom files hycom_files, hycom_paths = get_hycom_file_name( input_folder_forecast, c_year, c_month) except Exception as e: print(F"Failed to find any file for date {c_year}-{c_month}") continue # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): print( F"---------- Year {c_year} day: {c_day_of_year} --------------" ) # Makes regular expression of the current desired file re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' re_hycom_prev = F'archv.{c_year}_{(c_day_of_year-1):03d}\S*.a' try: # Gets the proper index of the file for the three cases hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom, file) != None ][0] hycom_file_idx_prev = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom_prev, file) != None ][0] except Exception as e: print( F"ERROR: The file for date {c_year} - {c_month} - {c_day_of_month} (and prev day) don't exist: {e}" ) continue days_with_data.append(c_day_of_year) model_state_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) model_state_np_fields_prev = read_hycom_fields( hycom_paths[hycom_file_idx_prev], fields, layers=layers) # Computes the difference between consecutive days from the desired fields for idx_field, c_field_name in enumerate(fields): model_state_np_c_field = model_state_np_fields[ c_field_name] model_state_np_c_field_prev = model_state_np_fields_prev[ c_field_name] c_diff = np.abs( np.nanmean(model_state_np_c_field_prev - model_state_np_c_field)) diff_per_field[c_field_name].append(c_diff) # Plots the differences between consecutive days. For all the fields together. img_viz.plot_1d_data_np( days_with_data, [diff_per_field[a] for a in diff_per_field.keys()], title='Difference between days', labels=fields, file_name_prefix='HYCOM_Diff_Between_Days', wide_ratio=4) # Plots the differences between consecutive days. Separated by fields for field in diff_per_field.keys(): img_viz.plot_1d_data_np( days_with_data, [diff_per_field[field]], title=F'Difference between days {field}', labels=[field], file_name_prefix=F'HYCOM_Diff_Between_Days_{field}', wide_ratio=4)
def plot_raw_data(proc_id): """ Makes images of the available data (Free run, DA and Observations) :param proc_id: :return: """ config = get_preproc_config() input_folder_tsis = config[PreprocParams.input_folder_tsis] input_folder_forecast = config[PreprocParams.input_folder_hycom] input_folder_obs = config[PreprocParams.input_folder_obs] output_folder = config[PreprocParams.imgs_output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] fields_obs = config[PreprocParams.fields_names_obs] plot_modes = config[PreprocParams.plot_modes_per_field] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # Iterate current year for c_year in YEARS: # Iterate current month for c_month in MONTHS: try: days_of_month, days_of_year = get_days_from_month(c_month) # Reads the data (DA, Free run, and observations) increment_files, increment_paths = get_hycom_file_name( input_folder_tsis, c_year, c_month) hycom_files, hycom_paths = get_hycom_file_name( input_folder_forecast, c_year, c_month) obs_files, obs_paths = get_obs_file_names( input_folder_obs, c_year, c_month) except Exception as e: print(F"Failed to find any file for date {c_year}-{c_month}") continue # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): if (c_day_of_month % NUM_PROC) == proc_id: # Makes regular expression of the current desired file re_tsis = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a' re_hycom = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' try: # Gets the proper index of the file for the three cases increment_file_idx = [ i for i, file in enumerate(increment_files) if re.search(re_tsis, file) != None ][0] hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_hycom, file) != None ][0] obs_file_idx = [ i for i, file in enumerate(obs_files) if re.search(re_obs, file) != None ][0] except Exception as e: print( F"ERROR: The file for date {c_year} - {c_month} - {(c_day_of_month+1)} doesn't exist: {e}" ) continue print( F" =============== Working with: {increment_files[increment_file_idx]} ============= " ) print( F"Available fields on increment: {read_field_names(increment_paths[increment_file_idx])}" ) increment_np_fields = read_hycom_fields( increment_paths[increment_file_idx], fields, layers=layers) model_state_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) obs_np_fields = read_netcdf(obs_paths[obs_file_idx], fields_obs, layers=[0], rename_fields=fields) for idx_field, c_field_name in enumerate(fields): increment_np_c_field = increment_np_fields[ c_field_name] nan_indx = increment_np_c_field == 0 increment_np_c_field[nan_indx] = np.nan model_state_np_c_field = model_state_np_fields[ c_field_name] obs_np_c_field = obs_np_fields[c_field_name] # diff_increment_vs_fo = increment_np_c_field - model_state_np_c_field # In these 2 cases, we only compute it for the surface layer # diff_obs_vs_hycom = obs_np_c_field - model_state_np_c_field[0] obs_np_c_field[502, 609] - model_state_np_c_field[0][502, 609] # diff_obs_vs_da = obs_np_c_field - increment_np_c_field[0] # mse_hycom_vs_da = mse(increment_np_c_field, model_state_np_c_field) # mse_obs_vs_hycom = mse(obs_np_c_field, model_state_np_c_field[0]) # mse_obs_vs_da = mse(obs_np_c_field, increment_np_c_field[0]) title = F"{c_field_name} {c_year}_{c_month:02d}_{(c_day_of_month+1):02d}" # ======================= Only Fredatae HYCOM, TSIS, Observations ================== img_viz.plot_3d_data_np( [ np.expand_dims(obs_np_c_field, 0), model_state_np_c_field, increment_np_c_field ], var_names=[ F'Observations', 'HYCOM', 'Increment (TSIS)' ], title=title, file_name_prefix= F'Summary_{c_field_name}_{c_year}_{c_month:02d}_{(c_day_of_month+1):02d}', z_lavels_names=layers, flip_data=True, plot_mode=plot_modes[idx_field])
def preproc_data(proc_id): """ This function preprocess the desired data. It does the following: 1) Looks for dates where there is 'increment', model, and observations data. 2) Saves the files on the same folder with only the 'desired' fields in netcdf format :param proc_id: :return: """ print("Preprocessing data....") config = get_preproc_config() input_folder_increment = config[PreprocParams.input_folder_tsis] input_folder_model = config[PreprocParams.input_folder_hycom] input_folder_obs = config[PreprocParams.input_folder_obs] output_folder = config[PreprocParams.output_folder] YEARS = config[PreprocParams.YEARS] MONTHS = config[PreprocParams.MONTHS] fields = config[PreprocParams.fields_names] obs_fields = config[PreprocParams.fields_names_obs] layers = config[PreprocParams.layers_to_plot] img_viz = EOAImageVisualizer(output_folder=output_folder, disp_images=False) # These are the data assimilated files for c_year in YEARS: for c_month in MONTHS: print( F"=============== Year: {c_year} Month: {c_month} ===========" ) days_of_month, days_of_year = get_days_from_month(c_month) # Rads all the files for this month da_files, da_paths = get_hycom_file_name(input_folder_increment, c_year, c_month) hycom_files, hycom_paths = get_hycom_file_name( input_folder_model, c_year, c_month) obs_files, obs_paths = get_obs_file_names(input_folder_obs, c_year, c_month) # This for is fixed to be able to run in parallel for c_day_of_month, c_day_of_year in enumerate(days_of_year): if (c_day_of_month % NUM_PROC) == proc_id: re_increment = F'incupd.{c_year}_{c_day_of_year:03d}\S*.a' re_model = F'archv.{c_year}_{c_day_of_year:03d}\S*.a' re_obs = F'tsis_obs_ias_{c_year}{c_month:02d}{c_day_of_month+1:02d}\S*.nc' try: da_file_idx = [ i for i, file in enumerate(da_files) if re.search(re_increment, file) != None ][0] print( F" =============== Working with: {da_files[da_file_idx]} Proc_id={proc_id} ============= " ) da_np_fields = read_hycom_fields(da_paths[da_file_idx], fields, layers=layers) hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_model, file) != None ][0] hycom_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) # --------- Preprocessing Increment (TSIS) ------------- proc_increment_data( da_np_fields, hycom_np_fields, fields, join(output_folder, F"increment_{c_year}_{c_day_of_year:03d}.nc")) except Exception as e: print( F"Warning: Increment file for date {c_year}-{c_month}-{c_day_of_month} ({re_increment}) doesn't exist: {e}" ) # Only when the increment file is not found we go to the next day. continue try: print( F" --------------- Working with: {hycom_files[hycom_file_idx]} ------------- " ) hycom_file_idx = [ i for i, file in enumerate(hycom_files) if re.search(re_model, file) != None ][0] hycom_np_fields = read_hycom_fields( hycom_paths[hycom_file_idx], fields, layers=layers) # --------- Preprocessing HYCOM data ------------- proc_model_data( hycom_np_fields, fields, join(output_folder, F"model_{c_year}_{c_day_of_year:03d}.nc")) except Exception as e: print( F"Warning: HYCOM file for date {c_year}-{c_month}-{c_day_of_month} ({re_model}) doesn't exist: {e}" ) try: obs_file_idx = [ i for i, file in enumerate(obs_files) if re.search(re_obs, file) != None ][0] # --------- Preprocessing observed data ------------- print( F" --------------- Working with: {hycom_files[hycom_file_idx]} ------------- " ) obs_ds = xr.load_dataset(obs_paths[obs_file_idx]) for id_field, c_obs_field in enumerate(obs_fields): if id_field == 0: preproc_obs_ds = obs_ds[ c_obs_field].to_dataset() else: preproc_obs_ds = preproc_obs_ds.merge( obs_ds[c_obs_field].to_dataset()) # --------------- Here we add the fields from the profiles as gridded data ----------- temp_group = 0 saln_group = 1 sst_p = np.zeros( preproc_obs_ds[c_obs_field].values.shape) sss_p = np.zeros(sst_p.shape) profiles = obs_ds.val tot_profiles = profiles.shape[0] obs_groups = obs_ds.ob_grp_present lons_i = obs_ds.grdi.values[:, 0, 0] lats_i = obs_ds.grdj.values[:, 0, 0] for i_group, c_type in enumerate(obs_groups): if c_type == saln_group or c_type == temp_group: for c_profile_i in range(tot_profiles): c_data = profiles[c_profile_i, -1, i_group] if c_type == saln_group: sss_p[ int(lats_i[c_profile_i]), int(lons_i[c_profile_i])] = c_data if c_type == temp_group: sst_p[ int(lats_i[c_profile_i]), int(lons_i[c_profile_i])] = c_data print(F"Max value: {np.amax(sst_p)}") print(F"Max value s: {np.amax(sss_p)}") preproc_obs_ds['sst_p'] = xr.DataArray( sst_p, dims=['yc', 'xc']) preproc_obs_ds['sss_p'] = xr.DataArray( sss_p, dims=['yc', 'xc']) preproc_obs_ds.to_netcdf( join(output_folder, F"obs_{c_year}_{c_day_of_year:03d}.nc")) except Exception as e: print( F"Warning: OBS file for date {c_year}-{c_month}-{c_day_of_month} doesn't exist: {e}" )