def __init__(self, input_args: dict): self.arguments = input_args self.INPUT_DIR = self.arguments['input'] self.OUTPUT_DIR = self.arguments['out'] self.ROI = self.arguments['roi'] self.product = self.arguments['product'] self.CSV_N1 = os.path.join(self.OUTPUT_DIR, 'CSV_N1') self.INSTANCE_TIME_TAG = datetime.now().strftime('%Y%m%dT%H%M%S') self.arguments['logfile'] = os.path.join( self.arguments['out'], 'sen3r_' + self.INSTANCE_TIME_TAG + '.log') self.log = Utils.create_log_handler(self.arguments['logfile']) self.IMG_DIR = os.path.join(self.OUTPUT_DIR, 'images') # Section 5 for single source of truth for the version number: # https://packaging.python.org/guides/single-sourcing-package-version/#single-sourcing-the-version self.VERSION = metadata.version( 'sen3r' ) # TODO: May be outdated depending on the environment installed version self.vertices = None # Further declaration may happen inside build_intermediary_files self.sorted_file_list = None # Declaration may happen inside build_intermediary_files
def build_raw_csvs(self): """ Parse the input arguments and return a path containing the output intermediary files. :return: l1_output_path Posixpath """ self.log.info(f'Searching for WFR files inside: {self.INPUT_DIR}') self.log.info('Sorting input files by date.') self.sorted_file_list = self.build_list_from_subset( input_directory_path=self.INPUT_DIR) self.log.info(f'Input files found: {len(self.sorted_file_list)}') self.log.info('------') self.log.info(f'Generating ancillary data folder: {self.CSV_N1}') Path(self.CSV_N1).mkdir(parents=True, exist_ok=True) self.log.info(f'Attempting to extract geometries from: {self.ROI}') self.vertices = Utils.roi2vertex(roi=self.ROI, aux_folder_out=self.CSV_N1) total = len(self.sorted_file_list) t1 = time.perf_counter() done_csvs = [] for n, img in enumerate(self.sorted_file_list): percent = int((n * 100) / total) figdate = os.path.basename(img).split('____')[1].split('_')[0] self.log.info(f'({percent}%) {n + 1} of {total} - {figdate}') try: band_data, img_data = self.get_s3_data(wfr_img_folder=img, vertices=self.vertices, roi_file=self.ROI) f_b_name = os.path.basename(img).split('.')[0] out_dir = os.path.join(self.CSV_N1, f_b_name + '.csv') self.log.info(f'Saving DF at : {out_dir}') band_data.to_csv(out_dir, index=False) done_csvs.append(out_dir) except FileNotFoundError as e404: # If some Band.nc file was missing inside the image, move to the next one. self.log.info(f'{e404}') self.log.info(f'Skipping: {figdate}') continue t2 = time.perf_counter() outputstr = f'>>> Finished in {round(t2 - t1, 2)} second(s). <<<' self.log.info(outputstr) return done_csvs
def build_single_csv(self, multiFileBridge=False): """ Parse the input arguments and return a path containing the output intermediary file. :return: l1_output_path Posixpath """ if not multiFileBridge: # TODO: build_single_csv should be called by build_raw_csvs for code recycling. self.log.info(f'Searching for WFR file inside: {self.INPUT_DIR}') self.log.info(f'Generating ancillary data folder: {self.CSV_N1}') Path(self.CSV_N1).mkdir(parents=True, exist_ok=True) self.log.info(f'Attempting to extract geometries from: {self.ROI}') self.vertices = Utils.roi2vertex(roi=self.ROI, aux_folder_out=self.CSV_N1) # TODO: https://xarray-spatial.org/reference/_autosummary/xrspatial.multispectral.true_color.html band_data, img_data = self.get_s3_data(wfr_img_folder=self.INPUT_DIR, vertices=self.vertices) # if df is not None: f_b_name = os.path.basename(self.INPUT_DIR).split('.')[0] out_dir = os.path.join(self.CSV_N1, f_b_name + '.csv') self.log.info(f'Saving DF at : {out_dir}') band_data.to_csv(out_dir, index=False) return band_data, img_data, [out_dir]
def main(): """ Entry point for the SEN3R package. Call sen3r -h or --help to see further options. """ # ,--------------, # | Start timers |-------------------------------------------------------------------------------------------------- # '--------------' Utils.tic() t1 = time.perf_counter() parser = argparse.ArgumentParser( description='SEN3R (Sentinel-3 Reflectance Retrieval over Rivers) ' 'enables extraction of reflectance time series from Sentinel-3 L2 WFR images over water bodies.') parser.add_argument("-i", "--input", help="The products input folder. Required.", type=str) parser.add_argument("-o", "--out", help="Output directory. Required.", type=str) parser.add_argument("-r", "--roi", help="Region of interest (SHP, KML or GeoJSON). Required", type=str) parser.add_argument("-p", "--product", help='Currently only WFR is available.', default='WFR', type=str) parser.add_argument("-c", "--cams", help="Path to search for auxiliary CAMS file. Optional.", type=str) parser.add_argument("-k", "--cluster", help="Which method to use for clustering. Optional.", default='M4', type=str) # parser.add_argument('-ng', '--no-graphics', help='Do not generate graphical reports.', action='store_true') # parser.add_argument('-np', '--no-pdf', help='Do not generate PDF report.', action='store_true') parser.add_argument("-s", "--single", help="Single mode: run SEN3R over only one image instead of a whole directory." " Optional.", action='store_true') parser.add_argument('-v', '--version', help='Displays current package version.', action='store_true') # ,--------------------------------------, # | STORE INPUT VARS INSIDE SEN3R OBJECT |-------------------------------------------------------------------------- # '--------------------------------------' args = parser.parse_args().__dict__ # Converts the input arguments from Namespace() to dict if args['version']: print(f'SEN3R version: {sen3r.__version__}') elif (args['input'] is None) or (args['out'] is None) or (args['roi'] is None): print('Please specify required INPUT/OUTPUT folders and REGION of interest (-i, -o, -r)') else: # ,------------, # | LOG SETUP |------------------------------------------------------------------------------------------------ # '------------' # args['logfile'] = os.path.join(args['out'], 'sen3r_'+datetime.now().strftime('%Y%m%dT%H%M%S')+'.log') # args['logger'] = Utils.create_log_handler(args['logfile']) s3r = Core(args) # Declare a SEN3R Core Object print(f'Starting SEN3R - LOG operations saved at:{s3r.arguments["logfile"]}') s3r.log.info(f'Starting SEN3R {s3r.VERSION} ({sen3r.__version__})') s3r.log.info('------') s3r.log.info('Input arguments:') for key in args: s3r.log.info(f'{key}: {args[key]}') s3r.log.info('------') if args['single']: # Single mode band_data, img_data, doneList = s3r.build_single_csv() else: # Default mode: several images doneList = s3r.build_raw_csvs() print('cams_args:', s3r.arguments['cams']) if s3r.arguments["cams"]: s3r.process_csv_list(raw_csv_list=doneList, irmax=0.01, use_cams=True, k_method=s3r.arguments['cluster']) else: s3r.process_csv_list(raw_csv_list=doneList, k_method=s3r.arguments['cluster']) # ,------------------------------, # | End timers and report to log |---------------------------------------------------------------------------------- # '------------------------------' t_hour, t_min, t_sec = Utils.tac() t2 = time.perf_counter() outputstr = f'Finished in {round(t2 - t1, 2)} second(s).' final_message = f'Elapsed execution time: {t_hour}h : {t_min}m : {t_sec}s' print(outputstr) print(final_message) pass
def raw_report(self, full_csv_path, img_id_date, raw_df, filtered_df, output_rprt_path=None): """ This function will ingest RAW CSVs from S3-FRBR > outsourcing.py > GPTBridge.get_pixels_by_kml(), convert them into Pandas DataFrames, filter them and generate a PDF report. # TODO: Update docstrings. """ figdate = img_id_date df = raw_df fdf = filtered_df RAW_CSV = full_csv_path if output_rprt_path: aux_figs_path = os.path.join(output_rprt_path, 'aux_' + figdate) else: aux_figs_path = os.path.join(RAW_CSV, 'aux_' + figdate) os.mkdir(aux_figs_path) # Generating the saving path of the individual report images so we can fetch it later. svpt1 = os.path.join(aux_figs_path, 'a.png') svpt2 = os.path.join(aux_figs_path, 'b.png') svpt3 = os.path.join(aux_figs_path, 'c.png') svpt4 = os.path.join(aux_figs_path, 'd.png') svpt5 = os.path.join(aux_figs_path, 'e.png') svpt_report = os.path.join(output_rprt_path, 'report_' + figdate + '.pdf') # IMG A - Scatter MAP plt.rcParams['figure.figsize'] = self.rcparam fig = plt.figure() ax = plt.axes() ax.set_title(figdate, fontsize=16) sktmap = ax.scatter(df['longitude:double'], df['latitude:double'], c=df['T865:float'], cmap='viridis', s=3) cbar = fig.colorbar(sktmap, ax=ax) cbar.set_label('Aer. Optical Thickness (T865)') ax.set_xlim(-61.34, -60.46) ax.set_ylim(-3.65, -3.25) ax.set_xlabel('LON') ax.set_ylabel('LAT') plt.savefig(svpt1, dpi=self.imgdpi, bbox_inches='tight') # IMG B - RAW Scatter self.plot_sidebyside_sktr( x1_data=df['Oa08_reflectance:float'], y1_data=df['Oa17_reflectance:float'], x2_data=df['Oa08_reflectance:float'], y2_data=df['Oa17_reflectance:float'], x_lbl='RED: Oa08 (665nm)', y_lbl='NIR: Oa17 (865nm)', c1_data=df['A865:float'], c1_lbl='Aer. Angstrom Expoent (A865)', c2_data=df['T865:float'], c2_lbl='Aer. Optical Thickness (T865)', # title=f'MANACAPURU v6 WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)', savepathname=svpt2) # IMG C - Filtered Scatter self.plot_sidebyside_sktr( x1_data=fdf['Oa08_reflectance:float'], y1_data=fdf['Oa17_reflectance:float'], x2_data=fdf['Oa08_reflectance:float'], y2_data=fdf['Oa17_reflectance:float'], x_lbl='RED: Oa08 (665nm)', y_lbl='NIR: Oa17 (865nm)', c1_data=fdf['A865:float'], c1_lbl='Aer. Angstrom Expoent (A865)', c2_data=fdf['T865:float'], c2_lbl='Aer. Optical Thickness (T865)', # title=f'MANACAPURU v6 WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)', savepathname=svpt3) # IMG C - KD Histogram x = fdf['Oa08_reflectance:float'].copy() pk, xray, yray, kde_res = self.kde_local_maxima(x) self.plot_kde_histntable(xray=xray, yray=yray, x=x, kde_res=kde_res, pk=pk, svpath_n_title=svpt4) # IMG D - Reflectance self.s3l2_custom_reflectance_plot( df=fdf, # figure_title=figdate, save_title=svpt5) # Report images = [Image.open(x) for x in [svpt1, svpt2, svpt3, svpt4, svpt5]] report = Utils.pil_grid(images, 1) if output_rprt_path: report.save(svpt_report, resolution=100.0) plt.close('all') return report
def process_csv_list(self, raw_csv_list, irmax=0.2, use_cams=False, do_clustering=True, k_method='M4'): """ :param k_method: :param do_clustering: :param use_cams: :param irmax: :param raw_csv_list: [List] containing the absolute path to files extracted by self.get_s3_data :return: """ # irmax = 0.001 # Negro # irmax = 0.08 # Fonte Boa # irmin = 0.001 # Manacapuru tsgen = TsGenerator(parent_log=self.log) # GET SERIES SAVE PATH # TODO: refactor excel_save_path = os.path.join(self.OUTPUT_DIR, 'sen3r.xlsx') out_dir = os.path.join(self.OUTPUT_DIR, 'CSV_N2') img_dir = os.path.join(self.OUTPUT_DIR, 'IMG') # img_save_pth = os.path.join(dest, station_name + f'_v{version}_img_dbscan') # series_save_pth = os.path.join(dest, station_name + f'_v{version}_img_dbscan_series') # CREATE THE DIRECTORIES IF THEY DOESN'T EXIST YET Path(out_dir).mkdir(parents=True, exist_ok=True) Path(img_dir).mkdir(parents=True, exist_ok=True) # Path(img_save_pth).mkdir(parents=True, exist_ok=True) # Path(series_save_pth).mkdir(parents=True, exist_ok=True) # Start timer t1 = time.perf_counter() max_aot = False # Update RAW DFs total = len(raw_csv_list) if use_cams: # READ CAMS df_cams = pd.read_csv(self.arguments['cams']) df_cams['pydate'] = pd.to_datetime(df_cams['Datetime']) for n, img in enumerate(raw_csv_list): print(f'>>> Processing: {n + 1} of {total} ... {img}') self.log.info(f'>>> Processing: {n + 1} of {total} ... {img}') figdate = os.path.basename(img).split('____')[1].split('_')[0] figtitl = os.path.basename(out_dir) + '_' + figdate savpt_raw_sctr = os.path.join(img_dir, figdate + '_0.png') savpt_sctr = os.path.join(img_dir, figdate + '_1.png') savpt_rrs = os.path.join(img_dir, figdate + '_2.png') savpt_k = os.path.join(img_dir, figdate + '_3.png') if use_cams: # Find the equivalent observation day in CAMS dtlbl = datetime.strptime(figdate, '%Y%m%dT%H%M%S') dtlbl = dtlbl.replace(hour=12, minute=0, second=0, microsecond=0) cams_row = df_cams[df_cams['pydate'] == dtlbl] cams_val = cams_row['AOD865'].values[0] # if cams_val is empty no match was found if not cams_val: cams_val = False else: cams_val = False # read and plot the rawDf = pd.read_csv(img, sep=',') tsgen.plot_sidebyside_sktr( x1_data=rawDf['Oa08_reflectance:float'], y1_data=rawDf['Oa17_reflectance:float'], x2_data=rawDf['Oa08_reflectance:float'], y2_data=rawDf['Oa17_reflectance:float'], x_lbl='RED: Oa08 (665nm)', y_lbl='NIR: Oa17 (865nm)', c1_data=rawDf['A865:float'], c1_lbl='Aer. Angstrom Expoent (A865)', c2_data=rawDf['T865:float'], c2_lbl='Aer. Optical Thickness (T865)', title= f'RAW {os.path.basename(out_dir)} WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)', savepathname=savpt_raw_sctr) # reprocessing the raw CSVs and removing reflectances above the threshold in IR. try: dfpth, df = tsgen.update_csvs( csv_path=img, glint=20.0, # ir_min_threshold=irmin, ir_max_threshold=irmax, savepath=out_dir, max_aot=max_aot, cams_val=cams_val) except Exception as e: print("type error: " + str(e)) continue if len(df) < 1: print(f'Skipping empty CSV: {dfpth}') continue # ,--------------------, # | DBSCAN Clustering |------------------------------------------------------------------------------------ # '--------------------' if do_clustering: # Backup the DF before cleaning it with DBSCAN bkpdf = df.copy() # Apply DBSCAN tsgen.db_scan(df, dd.clustering_methods[k_method]) # Plot and save the identified clusters tsgen.plot_scattercluster(df, col_x='Oa17_reflectance:float', col_y='Oa08_reflectance:float', col_color='T865:float', title=f'DBSCAN {figdate}', savepath=savpt_k) # Delete rows classified as noise: indexNames = df[df['cluster'] == -1].index df.drop(indexNames, inplace=True) if len(df) > 1: clusters = df.groupby(by='cluster').median() k = Utils.find_nearest(clusters['Oa21_reflectance:float'], 0) # Delete rows from the other clusters: indexNames = df[df['cluster'] != k].index df.drop(indexNames, inplace=True) # TODO : test cluster with the smallest T865 value as a primary/secondary rule. else: df = bkpdf.copy() tsgen.plot_sidebyside_sktr( x1_data=df['Oa08_reflectance:float'], y1_data=df['Oa17_reflectance:float'], x2_data=df['Oa08_reflectance:float'], y2_data=df['Oa17_reflectance:float'], x_lbl='RED: Oa08 (665nm)', y_lbl='NIR: Oa17 (865nm)', c1_data=df['A865:float'], c1_lbl='Aer. Angstrom Expoent (A865)', c2_data=df['T865:float'], c2_lbl='Aer. Optical Thickness (T865)', title= f'{os.path.basename(out_dir)} WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)', savepathname=savpt_sctr) tsgen.s3l2_custom_reflectance_plot( df=df, figure_title=f'{figdate}\n', c_lbl='Aer. Optical Thickness (T865)', save_title=savpt_rrs) print(f'Generating EXCEL output at: {excel_save_path}') self.log.info(f'Generating EXCEL output at: {excel_save_path}') # Generating excel file from the post-processed data wdir = out_dir todo = tsgen.build_list_from_subset(wdir) # Converting and saving the list of mean values into a XLS excel file. data = tsgen.generate_tms_data(wdir, todo) series_df = pd.DataFrame(data=data) # Delete these row indexes from dataFrame # indexNames = series_df[series_df['B17-865'] > irmax].index # indexNames = series_df[series_df['B17-865'] < irmin].index # series_df.drop(indexNames, inplace=True) # create empty excel wb = openpyxl.Workbook() wb.save(excel_save_path) # open the empty file and fill it up book = openpyxl.load_workbook(excel_save_path) writer = pd.ExcelWriter(excel_save_path, engine='openpyxl') writer.book = book # Saving to Excel .xlsx series_df.to_excel(writer, sheet_name='wfr', index=False) writer.save() writer.close() # Custom paiting the cells # https://openpyxl.readthedocs.io/en/stable/_modules/openpyxl/styles/colors.html wb = openpyxl.load_workbook(excel_save_path) # Delete the empty sheet del wb['Sheet'] # Get the sheet containing the final output ws = wb['wfr'] mod3r_colors = { 0: '00FFFFFF', 1: '00008000', 2: '00FE6000', 3: '00FF0000' } for row in ws.iter_rows(min_row=2, min_col=None, max_col=None): # get the quality flag for the given row flag_qlt = row[42] for cell in row: color_code = mod3r_colors[flag_qlt.value] cell.fill = PatternFill(start_color=color_code, end_color=color_code, fill_type="solid") wb.save(excel_save_path) t2 = time.perf_counter() outputstr = f'>>> Finished in {round(t2 - t1, 2)} second(s). <<<' print(outputstr) self.log.info(outputstr) pass
else: # Default mode: several images doneList = s3r.build_raw_csvs() print('cams_args:', s3r.arguments['cams']) if s3r.arguments["cams"]: s3r.process_csv_list(raw_csv_list=doneList, irmax=0.01, use_cams=True, k_method=s3r.arguments['cluster']) else: s3r.process_csv_list(raw_csv_list=doneList, k_method=s3r.arguments['cluster']) pass if __name__ == '__main__': # ,--------------, # | Start timers |-------------------------------------------------------------------------------------------------- # '--------------' Utils.tic() t1 = time.perf_counter() # ,-----, # | RUN |----------------------------------------------------------------------------------------------------------- # '-----' main() # ,------------------------------, # | End timers and report to log |---------------------------------------------------------------------------------- # '------------------------------' t_hour, t_min, t_sec = Utils.tac() t2 = time.perf_counter() outputstr = f'Finished in {round(t2 - t1, 2)} second(s).' final_message = f'Elapsed execution time: {t_hour}h : {t_min}m : {t_sec}s' print(outputstr) print(final_message)