def organize_forcing(self, var_name='soil_moisture'): logging.info(' ----> Organize soil moisture forcing ... ') time_range = self.time_range file_data_geo = self.file_data_geo file_data_basin = self.file_data_basin file_path_src_list = self.file_path_src_list file_path_ancillary_list = self.file_path_ancillary_list for (group_key_basin, group_basin), (group_key_geo, group_geo), group_file, group_ancillary in zip( file_data_basin.items(), file_data_geo.items(), file_path_src_list.values(), file_path_ancillary_list.values()): logging.info(' -----> Alert Area ' + group_key_basin + ' ... ') basin_list = list(group_basin.keys()) geo_mask_ref = group_geo geo_x_ref = group_geo['west_east'] geo_y_ref = group_geo['south_north'] basin_collections = {} file_ancillary_collections = {} if basin_list: for basin_name in basin_list: logging.info(' ------> BasinName ' + basin_name + ' ... ') file_basin_geo = group_basin[basin_name] file_basin_list = group_file[basin_name] file_ancillary_list = group_ancillary[basin_name] for time_step, file_basin_step, file_ancillary_step in zip(time_range, file_basin_list, file_ancillary_list): logging.info(' -------> TimeStep: ' + str(time_step) + ' ... ') if self.flag_ancillary_updating: if os.path.exists(file_ancillary_step): os.remove(file_ancillary_step) if not os.path.exists(file_ancillary_step): if file_basin_step.endswith(self.file_extension_zip): file_basin_out = change_extension(file_basin_step, self.file_extension_unzip) else: file_basin_out = file_basin_step if os.path.exists(file_basin_step): unzip_filename(file_basin_step, file_basin_out) data_vtot = read_file_binary( file_basin_out, data_geo=file_basin_geo[self.var_name_terrain].values) data_vmax = convert_cn2s( file_basin_geo[self.var_name_curve_number].values, file_basin_geo[self.var_name_terrain].values) data_sm = data_vtot / data_vmax data_sm[file_basin_geo[self.var_name_channels_network].values == 1] = -1 da_sm_base = create_darray_2d(data_sm, file_basin_geo[self.var_name_x], file_basin_geo[self.var_name_y], coord_name_x='west_east', coord_name_y='south_north', dim_name_x='west_east', dim_name_y='south_north') da_sm_interp = da_sm_base.interp(south_north=geo_y_ref, west_east=geo_x_ref, method='nearest') if time_step not in list(basin_collections.keys()): basin_collections[time_step] = [da_sm_interp] file_ancillary_collections[time_step] = [file_ancillary_step] else: data_tmp = basin_collections[time_step] data_tmp.append(da_sm_interp) basin_collections[time_step] = data_tmp file_tmp = file_ancillary_collections[time_step] file_tmp.append(file_ancillary_step) file_tmp = list(set(file_tmp)) file_ancillary_collections[time_step] = file_tmp logging.info(' -------> TimeStep: ' + str(time_step) + ' ... DONE') else: logging.info(' -------> TimeStep: ' + str(time_step) + ' ... FAILED') logging.warning(' ==> File: ' + file_basin_step + ' does not exist') else: logging.info(' -------> TimeStep: ' + str(time_step) + ' ... PREVIOUSLY DONE') logging.info(' ------> BasinName ' + basin_name + ' ... DONE') logging.info(' -----> Alert Area ' + group_key_basin + ' ... DONE') else: logging.info(' -----> Alert Area ' + group_key_basin + ' ... SKIPPED') logging.warning(' ==> Datasets are not defined') logging.info(' -----> Compose grid datasets from basins to alert area domain ... ') for (time_step, data_list), file_path_ancillary in zip( basin_collections.items(), file_ancillary_collections.values()): logging.info(' ------> TimeStep: ' + str(time_step) + ' ... ') if isinstance(file_path_ancillary, list) and file_path_ancillary.__len__() == 1: file_path_ancillary = file_path_ancillary[0] else: logging.error(' ===> Soil moisture ancillary file are not correctly defined.') raise IOError('Ancillary file is not unique') if self.flag_ancillary_updating: if os.path.exists(file_path_ancillary): os.remove(file_path_ancillary) if not os.path.exists(file_path_ancillary): logging.info(' -------> Merge grid datasets ... ') array_merge = np.zeros([geo_mask_ref.values.shape[0] * geo_mask_ref.values.shape[1]]) array_merge[:] = np.nan for data_step in data_list: array_values = data_step.values.ravel() idx_finite = np.isfinite(array_values) array_merge[idx_finite] = array_values[idx_finite] grid_merge = np.reshape(array_merge, [geo_mask_ref.values.shape[0], geo_mask_ref.values.shape[1]]) idx_choice = np.where(grid_merge == -1) grid_merge[idx_choice[0], idx_choice[1]] = np.nan idx_filter = np.where((geo_mask_ref.values == 1) & (np.isnan(grid_merge))) grid_merge[idx_filter[0], idx_filter[1]] = np.nanmean(grid_merge) grid_merge[(geo_mask_ref.values == 0)] = np.nan grid_merge[idx_choice[0], idx_choice[1]] = np.nan logging.info(' -------> Merge grid datasets ... DONE') logging.info(' -------> Save grid datasets ... ') dset_merge = create_dset( grid_merge, geo_mask_ref.values, geo_x_ref.values, geo_y_ref.values, var_data_time=time_step, var_data_name=var_name, var_geo_name='mask', var_data_attrs=None, var_geo_attrs=None, coord_name_x='longitude', coord_name_y='latitude', coord_name_time='time', dim_name_x='west_east', dim_name_y='south_north', dim_name_time='time', dims_order_2d=None, dims_order_3d=None) folder_name_ancillary, file_name_ancillary = os.path.split(file_path_ancillary) make_folder(folder_name_ancillary) if file_path_ancillary.endswith('.nc'): write_dset( file_path_ancillary, dset_merge, dset_mode='w', dset_engine='h5netcdf', dset_compression=0, dset_format='NETCDF4', dim_key_time='time', no_data=-9999.0) logging.info(' ------> Save grid datasets ... DONE. [NETCDF]') elif file_path_ancillary.endswith('.tiff'): save_file_tiff(file_path_ancillary, np.flipud(dset_merge[var_name].values), geo_x_ref.values, np.flipud(geo_y_ref.values), file_metadata=self.file_metadata, file_epsg_code=self.file_epsg_code) logging.info(' ------> Save grid datasets ... DONE. [GEOTIFF]') else: logging.info(' ------> Save grid datasets ... FAILED') logging.error(' ===> Filename format is not allowed') raise NotImplementedError('Format is not implemented yet') self.file_path_processed.append(file_path_ancillary) logging.info(' -------> Save grid datasets ... DONE') logging.info(' ------> TimeStep: ' + str(time_step) + ' ... DONE') else: logging.info(' ------> TimeStep: ' + str(time_step) + ' ... PREVIOUSLY DONE') logging.info(' -----> Compose grid datasets from basins to alert area domain ... DONE') logging.info(' ----> Organize soil moisture forcing ... DONE')
def organize_analysis_rain_map(self, var_name='rain'): logging.info(' ----> Compute rain analysis map [' + str(self.time_step) + '] ... ') time_step = self.time_step geo_data_region = self.geo_data_region geo_data_alert_area = self.geo_data_alert_area index_data_alert_area = self.index_data_alert_area group_data_alert_area = self.structure_data_group geoy_region_1d = geo_data_region['south_north'].values geox_region_1d = geo_data_region['west_east'].values mask_region_2d = geo_data_region.values geox_region_2d, geoy_region_2d = np.meshgrid(geox_region_1d, geoy_region_1d) group_analysis = {} for (group_data_key, group_data_items), geo_data_dframe in zip( group_data_alert_area.items(), geo_data_alert_area.values()): logging.info(' -----> Alert Area ' + group_data_key + ' ... ') file_path_dest = collect_file_list( time_step, self.folder_name_dest_indicators_raw, self.file_name_dest_indicators_raw, self.alg_template_tags, alert_area_name=group_data_key)[0] # Get index interpolated data between region and alert area domains if group_data_key in list(index_data_alert_area.keys()): index_data = index_data_alert_area[group_data_key] else: index_data = None if not os.path.exists(file_path_dest): geoy_out_1d = geo_data_dframe['south_north'].values geox_out_1d = geo_data_dframe['west_east'].values # Get subdomain mask, longitudes and latitudes mask_out_2d = geo_data_dframe.values geox_out_2d, geoy_out_2d = np.meshgrid(geox_out_1d, geoy_out_1d) time_delta_max = find_maximum_delta( group_data_items['rain_datasets']['search_period']) time_period_type = group_data_items['rain_datasets'][ 'search_type'][0] time_period_max, time_frequency_max = split_time_parts( time_delta_max) time_range = self.compute_time_range(time_step, time_period_max, time_period_type, time_frequency_max) file_list = collect_file_list( time_range, self.folder_name_ancillary_rain_map_raw, self.file_name_ancillary_rain_map_raw, self.alg_template_tags) file_analysis = True for file_step in file_list: if not os.path.exists(file_step): logging.warning(' ===> Filename ' + file_step + ' does not exist') file_analysis = False break if file_analysis: if file_list[0].endswith('.nc'): file_obj = xr.open_mfdataset(file_list, combine='by_coords') elif file_list[0].endswith('.tiff'): if file_list.__len__() > 1: data_out_3d = np.zeros(shape=[ geox_out_2d.shape[0], geoy_out_2d.shape[1], file_list.__len__() ]) data_out_3d[:, :, :] = np.nan data_time = [] for file_id, (file_step, timestamp_step) in enumerate( zip(file_list, time_range)): data_out_2d, proj, geotrans = read_file_tiff( file_step) # Grid datasets over subdomain mask values_out_interp = interp_grid2map( geox_region_2d, geoy_region_2d, data_out_2d.values, geox_out_2d, geoy_out_2d, index_out=index_data) values_out_interp[mask_out_2d == 0] = np.nan data_out_3d[:, :, file_id] = values_out_interp data_time.append(timestamp_step) file_obj = create_dset(data_out_3d, mask_out_2d, geox_out_2d, geoy_out_2d, var_data_time=data_time, var_data_name=var_name, var_geo_name='mask', var_data_attrs=None, var_geo_attrs=None, coord_name_x='longitude', coord_name_y='latitude', coord_name_time='time', dim_name_x='west_east', dim_name_y='south_north', dim_name_time='time', dims_order_2d=None, dims_order_3d=None) else: logging.error( ' ===> Length of file list is not allowed') raise NotImplementedError( 'Case is not implemented yet') else: logging.error(' ===> Filename format is not allowed') raise NotImplementedError( 'Format is not implemented yet') values_mean = file_obj[var_name].mean( dim=['south_north', 'west_east']).values analysis_df = pd.DataFrame( index=time_range, data=values_mean, columns=[self.template_struct_ts]).fillna(value=pd.NA) analysis_obj = {} for time_interval_value in group_data_items[ 'rain_datasets']['search_period']: logging.info( ' ------> Compute sum and avg values for ' + time_interval_value + ' ... ') time_period, time_frequency = split_time_parts( time_interval_value) tag_rain_accumulated = self.template_rain_point_accumulated.format( time_interval_value) # resample_df_sum = analysis_df[var_name].rolling(time_interval_value, min_periods=time_period).sum() resample_df_sum = analysis_df[ self.template_struct_ts].resample( time_interval_value, label='right').sum()[:-1] analysis_df[tag_rain_accumulated] = resample_df_sum tag_rain_avg = self.template_rain_point_avg.format( time_interval_value) # resample_df_avg = analysis_df[var_name].rolling(time_interval_value, min_periods=time_period).mean() resample_df_avg = analysis_df[ self.template_struct_ts].resample( time_interval_value, label='right').mean()[:-1] analysis_df[tag_rain_avg] = resample_df_avg analysis_obj[tag_rain_accumulated] = float( resample_df_sum.max()) analysis_obj[tag_rain_avg] = float( resample_df_avg.max()) logging.info( ' ------> Compute sum and avg values for ' + time_interval_value + ' ... DONE') analysis_collections = { self.template_struct_ts: analysis_df, self.template_struct_obj: analysis_obj } logging.info(' -----> Alert Area ' + group_data_key + ' ... DONE') else: analysis_collections = None logging.warning(' ===> Rain data are not available') logging.info(' -----> Alert Area ' + group_data_key + ' ... SKIPPED. Datasets are not available.') group_analysis[group_data_key] = analysis_collections else: logging.info(' -----> Alert Area ' + group_data_key + ' ... SKIPPED. Analysis file created previously') logging.info(' ----> Compute rain analysis map [' + str(self.time_step) + '] ... DONE') return group_analysis
def organize_dynamic_data(self): time_str = self.time_str time_period = self.time_period geo_da_dst = self.geo_da_dst src_dict = self.src_dict var_name_obj = self.var_name_obj file_path_obj_src = self.file_path_obj_src file_path_obj_anc = self.file_path_obj_anc flag_cleaning_ancillary = self.flag_cleaning_dynamic_ancillary log_stream.info(' ---> Organize dynamic datasets [' + time_str + '] ... ') # Check if ancillary file already exists file_check_list = [] for file_path_tmp in file_path_obj_anc: if os.path.exists(file_path_tmp): if flag_cleaning_ancillary: os.remove(file_path_tmp) file_check_list.append(False) else: file_check_list.append(True) else: file_check_list.append(False) file_check = all(file_check_list) # If statement on ancillary availability if not file_check: dset_collection = {} for var_name in var_name_obj: log_stream.info(' ----> Variable "' + var_name + '" ... ') var_compute, var_tag, var_scale_factor, var_shift, file_compression, \ file_geo_reference, file_type, file_coords, file_freq, compute_quality, var_decimal_digits \ = self.extract_var_fields(src_dict[var_name]) var_file_path_src = file_path_obj_src[var_name] if var_compute: var_geo_data = None for var_time, var_file_path_in in zip( time_period, var_file_path_src): log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... ') if os.path.exists(var_file_path_in): #copy to tmp var_file_path, var_file_name = os.path.split( var_file_path_in) var_file_path_in_tempcopy = self.domain + '_' + var_file_name copyfile( var_file_path_in, os.path.join(var_file_path, var_file_path_in_tempcopy)) var_file_path_in = os.path.join( var_file_path, var_file_path_in_tempcopy) if file_compression: var_file_path_out = self.define_file_name_unzip( var_file_path_in) unzip_filename(var_file_path_in, var_file_path_out) else: var_file_path_out = deepcopy(var_file_path_in) if file_type == 'binary': if var_geo_data is None: log_stream.info( ' ------> Select geo reference for binary datasets ... ' ) var_geo_name = search_geo_reference( var_file_path_out, self.static_data_src, tag_geo_reference=file_geo_reference) log_stream.info( ' -------> Geo reference name: ' + var_geo_name) var_geo_data, var_geo_x, var_geo_y, var_geo_attrs = \ self.set_geo_attributes(self.static_data_src[var_geo_name]) log_stream.info( ' ------> Select geo reference for binary datasets ... DONE' ) var_da_src = read_data_binary( var_file_path_out, var_geo_x, var_geo_y, var_geo_attrs, var_scale_factor=var_scale_factor, var_time=var_time, var_name=var_name, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d) elif file_type == 'netcdf': if var_geo_data is None: log_stream.info( ' ------> Select geo reference for netcdf datasets ... ' ) var_geo_data, var_geo_x, var_geo_y, var_geo_attrs = \ self.set_geo_attributes(self.static_data_src[file_geo_reference]) log_stream.info( ' ------> Select geo reference for netcdf datasets ... DONE' ) var_da_src = read_data_nc( var_file_path_out, var_geo_x, var_geo_y, var_geo_attrs, var_coords=file_coords, var_scale_factor=var_scale_factor, var_name=var_tag, var_time=var_time, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d) elif file_type == 'tiff' or file_type == 'asc': var_da_src = read_data_tiff( var_file_path_out, var_scale_factor=var_scale_factor, var_name=var_tag, var_time=var_time, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d, decimal_round_data=2, decimal_round_geo=7) elif file_type == 'mat': var_da_src = read_data_mat( var_file_path_out, var_scale_factor=var_scale_factor, var_name=var_tag, var_time=var_time, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d, decimal_round_data=2, decimal_round_geo=7, src_dict=src_dict[var_name]) else: log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... FAILED') log_stream.error(' ===> File type "' + file_type + '"is not allowed.') raise NotImplementedError( 'Case not implemented yet') # Delete (if needed the uncompressed file(s) if var_file_path_in != var_file_path_out: if os.path.exists(var_file_path_out): os.remove(var_file_path_out) # Delete temporary file os.remove(var_file_path_in) # Apply scale factor and shift to values if var_shift is not None: var_da_src.values = var_da_src.values + var_shift if var_scale_factor is not None: var_da_src.values = var_da_src.values / var_scale_factor #if var_shift is not None: # var_da_src=var_da_src + var_shift #if var_scale_factor is not None: # var_da_src=var_da_src / var_scale_factor # Organize destination dataset if var_da_src is not None: # Active (if needed) interpolation method to the variable source data-array active_interp = active_var_interp( var_da_src.attrs, geo_da_dst.attrs) # Apply the interpolation method to the variable source data-array if active_interp: var_da_dst = apply_var_interp( var_da_src, geo_da_dst, var_name=var_name, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, interp_method=self.interp_method) else: if var_tag != var_name: var_da_dst = deepcopy(var_da_src) var_da_dst.name = var_name else: var_da_dst = deepcopy(var_da_src) # Mask the variable destination data-array var_nodata = None if 'nodata_value' in list( var_da_dst.attrs.keys()): var_nodata = var_da_dst.attrs[ 'nodata_value'] geo_nodata = None if 'nodata_value' in list( geo_da_dst.attrs.keys()): geo_nodata = geo_da_dst.attrs[ 'nodata_value'] if (geo_nodata is not None) and (var_nodata is not None): var_da_masked = var_da_dst.where( (geo_da_dst.values[:, :, np.newaxis] != geo_nodata) & (var_da_dst != var_nodata)) else: var_da_masked = deepcopy(var_da_dst) #Sanity check to remove nans var_da_masked.values = \ np.where(np.isnan(var_da_masked.values), var_nodata, var_da_masked.values) #Round var_da_masked.values = np.round( var_da_masked.values, var_decimal_digits) # plt.figure(1) # plt.imshow(var_da_dst.values[:, :, 0]) # plt.colorbar() # plt.figure(2) # plt.imshow(var_da_src.values[:, :, 0]) # plt.colorbar() # plt.figure(3) # plt.imshow(var_da_masked.values[:, :, 0]) # plt.colorbar() # plt.show() # plt.figure(4) # plt.imshow(geo_da_dst.values) # plt.colorbar() # plt.show() # Organize data in a common datasets var_dset_masked = create_dset( var_data_time=var_time, var_data_name=var_name, var_data_values=var_da_masked, var_data_attrs=None, var_geo_1d=False, file_attributes=geo_da_dst.attrs, var_geo_name='terrain', var_geo_values=geo_da_dst.values, var_geo_x=geo_da_dst['longitude'].values, var_geo_y=geo_da_dst['latitude'].values, var_geo_attrs=None) # Organize data in merged datasets if var_time not in list( dset_collection.keys()): dset_collection[var_time] = var_dset_masked else: var_dset_tmp = deepcopy( dset_collection[var_time]) var_dset_tmp = var_dset_tmp.merge( var_dset_masked, join='right') dset_collection[var_time] = var_dset_tmp #Compute SQA if needed if compute_quality: log_stream.info(' ----> Variable "' + var_name + '" ... computing quality ') SQA = compute_SQA(var_da_masked.values, geo_da_dst.values, self.SQA_ground_and_snow) SQA_dset = create_dset( var_data_time=var_time, var_data_name='SQA', var_data_values=SQA, var_data_attrs=None, var_geo_1d=False, file_attributes=geo_da_dst.attrs, var_geo_name='terrain', var_geo_values=geo_da_dst.values, var_geo_x=geo_da_dst['longitude']. values, var_geo_y=geo_da_dst['latitude']. values, var_geo_attrs=None) var_dset_tmp = deepcopy( dset_collection[var_time]) var_dset_tmp = var_dset_tmp.merge( SQA_dset, join='right') dset_collection[var_time] = var_dset_tmp log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... DONE') else: log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... Datasets is not defined') else: var_da_src = None log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... Datasets is not defined') log_stream.info(' ----> Variable "' + var_name + '" ... DONE') else: log_stream.info( ' ----> Variable "' + var_name + '" ... SKIPPED. Compute flag not activated.') # Save ancillary datasets for file_path_anc, (dset_time, dset_anc) in zip(file_path_obj_anc, dset_collection.items()): folder_name_anc, file_name_anc = os.path.split(file_path_anc) if not os.path.exists(folder_name_anc): make_folder(folder_name_anc) write_obj(file_path_anc, dset_anc) log_stream.info(' ---> Organize dynamic datasets [' + time_str + '] ... DONE') else: log_stream.info( ' ---> Organize dynamic datasets [' + time_str + '] ... SKIPPED. All datasets are previously computed')
def organize_analysis_sm(self, var_name='soil_moisture'): logging.info(' ----> Compute soil moisture analysis [' + str(self.time_step) + '] ... ') time_step = self.time_step geo_data_alert_area = self.geo_data_alert_area group_data_alert_area = self.structure_data_group group_analysis = {} for (group_data_key, group_data_items), geo_data_dframe in zip( group_data_alert_area.items(), geo_data_alert_area.values()): logging.info(' -----> Alert Area ' + group_data_key + ' ... ') geoy_out_1d = geo_data_dframe['south_north'].values geox_out_1d = geo_data_dframe['west_east'].values mask_2d = geo_data_dframe.values geox_out_2d, geoy_out_2d = np.meshgrid(geox_out_1d, geoy_out_1d) time_delta_max = find_maximum_delta( group_data_items['sm_datasets']['search_period']) time_period_type = group_data_items['sm_datasets']['search_type'][ 0] time_period_max, time_frequency_max = split_time_parts( time_delta_max) time_range = self.compute_time_range(time_step, time_period_max, time_period_type, time_frequency_max) file_list = collect_file_list(time_range, self.folder_name_ancillary_sm_raw, self.file_name_ancillary_sm_raw, self.alg_template_tags, alert_area_name=group_data_key) file_path_dest = collect_file_list( time_step, self.folder_name_dest_indicators_raw, self.file_name_dest_indicators_raw, self.alg_template_tags, alert_area_name=group_data_key)[0] if not os.path.exists(file_path_dest): file_list_check = [] time_range_check = [] for file_step, timestamp_step in zip(file_list, time_range): if os.path.exists(file_step): file_list_check.append(file_step) time_range_check.append(timestamp_step) file_analysis = False if file_list_check.__len__() >= 1: file_analysis = True if file_analysis: analysis_collections = {} if file_list_check[0].endswith('.nc'): file_data_raw = xr.open_mfdataset(file_list_check, combine='by_coords') elif file_list_check[0].endswith('.tiff'): if file_list_check.__len__() == 1: data_2d, proj, geotrans = read_file_tiff( file_list_check[0]) file_data_raw = create_dset( data_2d, mask_2d, geox_out_2d, geoy_out_2d, var_data_time=time_step, var_data_name=var_name, var_geo_name='mask', var_data_attrs=None, var_geo_attrs=None, coord_name_x='longitude', coord_name_y='latitude', coord_name_time='time', dim_name_x='west_east', dim_name_y='south_north', dim_name_time='time', dims_order_2d=None, dims_order_3d=None) elif file_list.__len__() > 1: data_3d = np.zeros(shape=[ geox_out_2d.shape[0], geoy_out_2d.shape[1], file_list_check.__len__() ]) data_3d[:, :, :] = np.nan data_time = [] for file_id, (file_step, timestamp_step) in enumerate( zip(file_list_check, time_range_check)): data_2d, proj, geotrans = read_file_tiff( file_step) data_3d[:, :, file_id] = data_2d data_time.append(timestamp_step) file_data_raw = create_dset( data_3d, mask_2d, geox_out_2d, geoy_out_2d, var_data_time=data_time, var_data_name=var_name, var_geo_name='mask', var_data_attrs=None, var_geo_attrs=None, coord_name_x='longitude', coord_name_y='latitude', coord_name_time='time', dim_name_x='west_east', dim_name_y='south_north', dim_name_time='time', dims_order_2d=None, dims_order_3d=None) else: logging.error( ' ===> Length of file list is not allowed') raise NotImplementedError( 'Case is not implemented yet') else: logging.error(' ===> Filename format is not allowed') raise NotImplementedError( 'Format is not implemented yet') file_data_mean = file_data_raw[var_name].mean( dim=['south_north', 'west_east']) file_time = list(file_data_raw.time.values) file_values_mean = file_data_mean.values file_ts = pd.DatetimeIndex(file_time) if file_ts.shape[0] == 1: file_ts = [file_ts] file_values_mean = [file_values_mean] # Soil moisture average time-series analysis_df = pd.DataFrame( index=file_ts, data=file_values_mean, columns=[self.template_struct_ts]).fillna(value=pd.NA) analysis_collections[self.template_struct_ts] = {} analysis_collections[self.template_struct_ts] = analysis_df # Soil moisture first value in tne selected period tag_sm_point_first = self.template_sm_point_first file_time_first = pd.Timestamp( file_time[0]).strftime('%Y%m%d_%H%M') file_value_first = float(file_values_mean[0]) tag_sm_point_first = tag_sm_point_first.format( file_time_first) # Soil moisture last value in tne selected period tag_sm_point_last = self.template_sm_point_last file_time_last = pd.Timestamp( file_time[-1]).strftime('%Y%m%d_%H%M') file_value_last = float(file_values_mean[-1]) tag_sm_point_last = tag_sm_point_last.format( file_time_last) # Soil moisture average in tne selected period tag_sm_point_avg = self.template_sm_point_avg file_value_avg = float( analysis_df[self.template_struct_ts].mean()) # Soil moisture maximum in tne selected period tag_sm_point_max = self.template_sm_point_max file_value_max = float( analysis_df[self.template_struct_ts].max()) #analysis_ts = analysis_df.max() analysis_collections[self.template_struct_obj] = {} analysis_collections[self.template_struct_obj][ tag_sm_point_first] = file_value_first analysis_collections[self.template_struct_obj][ tag_sm_point_last] = file_value_last analysis_collections[self.template_struct_obj][ tag_sm_point_avg] = file_value_avg analysis_collections[self.template_struct_obj][ tag_sm_point_max] = file_value_max logging.info(' -----> Alert Area ' + group_data_key + ' ... DONE') else: analysis_collections = None logging.warning( ' ===> Soil moisture data are not available') logging.info(' -----> Alert Area ' + group_data_key + ' ... SKIPPED. Datasets are not available.') group_analysis[group_data_key] = analysis_collections else: logging.info(' -----> Alert Area ' + group_data_key + ' ... SKIPPED. Analysis file created previously') logging.info(' ----> Compute soil moisture analysis [' + str(self.time_step) + '] ... DONE') return group_analysis
def organize_forcing(self, var_name='rain', var_min=0, var_max=None): logging.info(' ----> Organize rain forcing ... ') geoy_out_1d = self.file_data_geo['south_north'].values geox_out_1d = self.file_data_geo['west_east'].values mask_out_2d = self.file_data_geo.values point_weather_section = self.geo_data_weather_station geox_out_2d, geoy_out_2d = np.meshgrid(geox_out_1d, geoy_out_1d) for datetime_step, file_path_src, file_path_ancillary_map, file_path_ancillary_point in zip( self.time_range, self.file_path_src_list, self.file_path_ancillary_map_list, self.file_path_ancillary_point_list): logging.info(' -----> TimeStep: ' + str(datetime_step) + ' ... ') if self.flag_ancillary_updating: if os.path.exists(file_path_ancillary_map): os.remove(file_path_ancillary_map) if os.path.exists(file_path_ancillary_point): os.remove(file_path_ancillary_point) if not (os.path.exists(file_path_ancillary_map or os.path.exists(file_path_ancillary_point))): if os.path.exists(file_path_src): file_dframe = read_file_csv(file_path_src, datetime_step, file_header=self.columns_src, file_sep=self.file_columns_sep, scale_factor_longitude=self.file_scale_factor_longitude, scale_factor_latitude=self.file_scale_factor_latitude, scale_factor_data=self.file_scale_factor_rain) # Filter data using variable limits (if defined) if var_min is not None: file_dframe = file_dframe[(file_dframe['data'] >= var_min)] if var_max is not None: file_dframe = file_dframe[(file_dframe['data'] <= var_max)] if file_dframe is not None: file_time_src = file_dframe.index.unique() else: file_time_src = None else: file_dframe = None file_time_src = None logging.warning(' ===> File datasets of rain weather stations is not available.') if (file_time_src is not None) and (file_time_src.__len__() > 1): logging.warning(' ===> Time step selected are greater than 1. Errors could arise in the script') if file_dframe is not None: logging.info(' ------> Interpolate points to map datasets ... ') map_out_2d = interpolate_rain_dataframe(file_dframe, mask_out_2d, geox_out_2d, geoy_out_2d, folder_tmp=self.folder_tmp) logging.info(' ------> Interpolate points to map datasets ... DONE') logging.info(' ------> Save map datasets ... ') folder_name_map, file_name_map = os.path.split(file_path_ancillary_map) make_folder(folder_name_map) if file_path_ancillary_map.endswith('.nc'): dset_out = create_dset( map_out_2d, mask_out_2d, geox_out_2d, geoy_out_2d, var_data_time=datetime_step, var_data_name=var_name, var_geo_name='mask', var_data_attrs=None, var_geo_attrs=None, coord_name_x='longitude', coord_name_y='latitude', coord_name_time='time', dim_name_x='west_east', dim_name_y='south_north', dim_name_time='time', dims_order_2d=None, dims_order_3d=None) write_dset( file_path_ancillary_map, dset_out, dset_mode='w', dset_engine='h5netcdf', dset_compression=0, dset_format='NETCDF4', dim_key_time='time', no_data=-9999.0) logging.info(' ------> Save map datasets ... DONE. [NETCDF]') elif file_path_ancillary_map.endswith('.tiff'): ### error in saving ERROR 1: Only OGC WKT Projections supported for writing to GeoTIFF. EPSG:4326 not supported. save_file_tiff(file_path_ancillary_map, map_out_2d, geox_out_2d, geoy_out_2d, file_metadata=self.file_metadata, file_epsg_code=self.file_epsg_code) logging.info(' ------> Save map datasets ... DONE. [GEOTIFF]') else: logging.info(' ------> Save map datasets ... FAILED') logging.error(' ===> Filename format is not allowed') raise NotImplementedError('Format is not implemented yet') self.file_path_processed_map.append(file_path_ancillary_map) logging.info(' ------> Save points datasets ... ') folder_name_point, file_name_point = os.path.split(file_path_ancillary_point) make_folder(folder_name_point) if file_path_ancillary_point.endswith('.csv'): write_file_csv(file_path_ancillary_point, file_dframe) logging.info(' ------> Save points datasets... DONE') else: logging.info(' ------> Save points datasets ... FAILED') logging.error(' ===> Filename format is not allowed') raise NotImplementedError('Format is not implemented yet') self.file_path_processed_point.append(file_path_ancillary_point) logging.info(' -----> TimeStep: ' + str(datetime_step) + ' ... DONE') else: logging.info(' -----> TimeStep: ' + str(datetime_step) + ' ... FAILED') logging.warning(' ===> File datasets of rain weather stations is not available.') else: logging.info(' -----> TimeStep: ' + str(datetime_step) + ' ... PREVIOUSLY DONE') logging.info(' ----> Organize rain forcing ... DONE')