Beispiel #1
0
 def __init__(self, input_args: dict):
     self.arguments = input_args
     self.INPUT_DIR = self.arguments['input']
     self.OUTPUT_DIR = self.arguments['out']
     self.ROI = self.arguments['roi']
     self.product = self.arguments['product']
     self.CSV_N1 = os.path.join(self.OUTPUT_DIR, 'CSV_N1')
     self.INSTANCE_TIME_TAG = datetime.now().strftime('%Y%m%dT%H%M%S')
     self.arguments['logfile'] = os.path.join(
         self.arguments['out'], 'sen3r_' + self.INSTANCE_TIME_TAG + '.log')
     self.log = Utils.create_log_handler(self.arguments['logfile'])
     self.IMG_DIR = os.path.join(self.OUTPUT_DIR, 'images')
     # Section 5 for single source of truth for the version number:
     # https://packaging.python.org/guides/single-sourcing-package-version/#single-sourcing-the-version
     self.VERSION = metadata.version(
         'sen3r'
     )  # TODO: May be outdated depending on the environment installed version
     self.vertices = None  # Further declaration may happen inside build_intermediary_files
     self.sorted_file_list = None  # Declaration may happen inside build_intermediary_files
Beispiel #2
0
    def build_raw_csvs(self):
        """
        Parse the input arguments and return a path containing the output intermediary files.
        :return: l1_output_path Posixpath
        """
        self.log.info(f'Searching for WFR files inside: {self.INPUT_DIR}')
        self.log.info('Sorting input files by date.')
        self.sorted_file_list = self.build_list_from_subset(
            input_directory_path=self.INPUT_DIR)
        self.log.info(f'Input files found: {len(self.sorted_file_list)}')
        self.log.info('------')
        self.log.info(f'Generating ancillary data folder: {self.CSV_N1}')
        Path(self.CSV_N1).mkdir(parents=True, exist_ok=True)
        self.log.info(f'Attempting to extract geometries from: {self.ROI}')
        self.vertices = Utils.roi2vertex(roi=self.ROI,
                                         aux_folder_out=self.CSV_N1)

        total = len(self.sorted_file_list)
        t1 = time.perf_counter()
        done_csvs = []
        for n, img in enumerate(self.sorted_file_list):
            percent = int((n * 100) / total)
            figdate = os.path.basename(img).split('____')[1].split('_')[0]
            self.log.info(f'({percent}%) {n + 1} of {total} - {figdate}')
            try:
                band_data, img_data = self.get_s3_data(wfr_img_folder=img,
                                                       vertices=self.vertices,
                                                       roi_file=self.ROI)
                f_b_name = os.path.basename(img).split('.')[0]
                out_dir = os.path.join(self.CSV_N1, f_b_name + '.csv')
                self.log.info(f'Saving DF at : {out_dir}')
                band_data.to_csv(out_dir, index=False)
                done_csvs.append(out_dir)
            except FileNotFoundError as e404:
                # If some Band.nc file was missing inside the image, move to the next one.
                self.log.info(f'{e404}')
                self.log.info(f'Skipping: {figdate}')
                continue

        t2 = time.perf_counter()
        outputstr = f'>>> Finished in {round(t2 - t1, 2)} second(s). <<<'
        self.log.info(outputstr)
        return done_csvs
Beispiel #3
0
    def build_single_csv(self, multiFileBridge=False):
        """
        Parse the input arguments and return a path containing the output intermediary file.
        :return: l1_output_path Posixpath
        """
        if not multiFileBridge:  # TODO: build_single_csv should be called by build_raw_csvs for code recycling.
            self.log.info(f'Searching for WFR file inside: {self.INPUT_DIR}')
            self.log.info(f'Generating ancillary data folder: {self.CSV_N1}')
            Path(self.CSV_N1).mkdir(parents=True, exist_ok=True)
            self.log.info(f'Attempting to extract geometries from: {self.ROI}')
            self.vertices = Utils.roi2vertex(roi=self.ROI,
                                             aux_folder_out=self.CSV_N1)

        # TODO: https://xarray-spatial.org/reference/_autosummary/xrspatial.multispectral.true_color.html
        band_data, img_data = self.get_s3_data(wfr_img_folder=self.INPUT_DIR,
                                               vertices=self.vertices)

        # if df is not None:
        f_b_name = os.path.basename(self.INPUT_DIR).split('.')[0]
        out_dir = os.path.join(self.CSV_N1, f_b_name + '.csv')
        self.log.info(f'Saving DF at : {out_dir}')
        band_data.to_csv(out_dir, index=False)
        return band_data, img_data, [out_dir]
Beispiel #4
0
def main():
    """
    Entry point for the SEN3R package. Call sen3r -h or --help to see further options.
    """

    # ,--------------,
    # | Start timers |--------------------------------------------------------------------------------------------------
    # '--------------'
    Utils.tic()
    t1 = time.perf_counter()

    parser = argparse.ArgumentParser(
        description='SEN3R (Sentinel-3 Reflectance Retrieval over Rivers) '
                    'enables extraction of reflectance time series from Sentinel-3 L2 WFR images over water bodies.')
    parser.add_argument("-i", "--input", help="The products input folder. Required.", type=str)
    parser.add_argument("-o", "--out", help="Output directory. Required.", type=str)
    parser.add_argument("-r", "--roi", help="Region of interest (SHP, KML or GeoJSON). Required", type=str)
    parser.add_argument("-p", "--product", help='Currently only WFR is available.', default='WFR', type=str)
    parser.add_argument("-c", "--cams", help="Path to search for auxiliary CAMS file. Optional.", type=str)
    parser.add_argument("-k", "--cluster", help="Which method to use for clustering. Optional.", default='M4', type=str)
    # parser.add_argument('-ng', '--no-graphics', help='Do not generate graphical reports.', action='store_true')
    # parser.add_argument('-np', '--no-pdf', help='Do not generate PDF report.', action='store_true')
    parser.add_argument("-s", "--single",
                        help="Single mode: run SEN3R over only one image instead of a whole directory."
                             " Optional.", action='store_true')
    parser.add_argument('-v', '--version', help='Displays current package version.', action='store_true')

    # ,--------------------------------------,
    # | STORE INPUT VARS INSIDE SEN3R OBJECT |--------------------------------------------------------------------------
    # '--------------------------------------'
    args = parser.parse_args().__dict__  # Converts the input arguments from Namespace() to dict

    if args['version']:
        print(f'SEN3R version: {sen3r.__version__}')

    elif (args['input'] is None) or (args['out'] is None) or (args['roi'] is None):
        print('Please specify required INPUT/OUTPUT folders and REGION of interest (-i, -o, -r)')

    else:
        # ,------------,
        # | LOG SETUP  |------------------------------------------------------------------------------------------------
        # '------------'
        # args['logfile'] = os.path.join(args['out'], 'sen3r_'+datetime.now().strftime('%Y%m%dT%H%M%S')+'.log')
        # args['logger'] = Utils.create_log_handler(args['logfile'])
        s3r = Core(args)  # Declare a SEN3R Core Object
        print(f'Starting SEN3R - LOG operations saved at:{s3r.arguments["logfile"]}')
        s3r.log.info(f'Starting SEN3R {s3r.VERSION} ({sen3r.__version__})')
        s3r.log.info('------')
        s3r.log.info('Input arguments:')
        for key in args:
            s3r.log.info(f'{key}: {args[key]}')
        s3r.log.info('------')

        if args['single']:  # Single mode
            band_data, img_data, doneList = s3r.build_single_csv()

        else:  # Default mode: several images
            doneList = s3r.build_raw_csvs()
            print('cams_args:', s3r.arguments['cams'])
            if s3r.arguments["cams"]:
                s3r.process_csv_list(raw_csv_list=doneList, irmax=0.01, use_cams=True, k_method=s3r.arguments['cluster'])
            else:
                s3r.process_csv_list(raw_csv_list=doneList, k_method=s3r.arguments['cluster'])

    # ,------------------------------,
    # | End timers and report to log |----------------------------------------------------------------------------------
    # '------------------------------'
    t_hour, t_min, t_sec = Utils.tac()
    t2 = time.perf_counter()
    outputstr = f'Finished in {round(t2 - t1, 2)} second(s).'
    final_message = f'Elapsed execution time: {t_hour}h : {t_min}m : {t_sec}s'
    print(outputstr)
    print(final_message)

    pass
Beispiel #5
0
    def raw_report(self,
                   full_csv_path,
                   img_id_date,
                   raw_df,
                   filtered_df,
                   output_rprt_path=None):
        """
        This function will ingest RAW CSVs from S3-FRBR > outsourcing.py > GPTBridge.get_pixels_by_kml(), convert them
        into Pandas DataFrames, filter them and generate a PDF report.

        # TODO: Update docstrings.
        """

        figdate = img_id_date
        df = raw_df
        fdf = filtered_df
        RAW_CSV = full_csv_path

        if output_rprt_path:
            aux_figs_path = os.path.join(output_rprt_path, 'aux_' + figdate)

        else:
            aux_figs_path = os.path.join(RAW_CSV, 'aux_' + figdate)

        os.mkdir(aux_figs_path)

        # Generating the saving path of the individual report images so we can fetch it later.
        svpt1 = os.path.join(aux_figs_path, 'a.png')
        svpt2 = os.path.join(aux_figs_path, 'b.png')
        svpt3 = os.path.join(aux_figs_path, 'c.png')
        svpt4 = os.path.join(aux_figs_path, 'd.png')
        svpt5 = os.path.join(aux_figs_path, 'e.png')
        svpt_report = os.path.join(output_rprt_path,
                                   'report_' + figdate + '.pdf')

        # IMG A - Scatter MAP
        plt.rcParams['figure.figsize'] = self.rcparam
        fig = plt.figure()
        ax = plt.axes()
        ax.set_title(figdate, fontsize=16)
        sktmap = ax.scatter(df['longitude:double'],
                            df['latitude:double'],
                            c=df['T865:float'],
                            cmap='viridis',
                            s=3)
        cbar = fig.colorbar(sktmap, ax=ax)
        cbar.set_label('Aer. Optical Thickness (T865)')

        ax.set_xlim(-61.34, -60.46)
        ax.set_ylim(-3.65, -3.25)
        ax.set_xlabel('LON')
        ax.set_ylabel('LAT')

        plt.savefig(svpt1, dpi=self.imgdpi, bbox_inches='tight')

        # IMG B - RAW Scatter
        self.plot_sidebyside_sktr(
            x1_data=df['Oa08_reflectance:float'],
            y1_data=df['Oa17_reflectance:float'],
            x2_data=df['Oa08_reflectance:float'],
            y2_data=df['Oa17_reflectance:float'],
            x_lbl='RED: Oa08 (665nm)',
            y_lbl='NIR: Oa17 (865nm)',
            c1_data=df['A865:float'],
            c1_lbl='Aer. Angstrom Expoent (A865)',
            c2_data=df['T865:float'],
            c2_lbl='Aer. Optical Thickness (T865)',
            # title=f'MANACAPURU v6 WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)',
            savepathname=svpt2)

        # IMG C - Filtered Scatter
        self.plot_sidebyside_sktr(
            x1_data=fdf['Oa08_reflectance:float'],
            y1_data=fdf['Oa17_reflectance:float'],
            x2_data=fdf['Oa08_reflectance:float'],
            y2_data=fdf['Oa17_reflectance:float'],
            x_lbl='RED: Oa08 (665nm)',
            y_lbl='NIR: Oa17 (865nm)',
            c1_data=fdf['A865:float'],
            c1_lbl='Aer. Angstrom Expoent (A865)',
            c2_data=fdf['T865:float'],
            c2_lbl='Aer. Optical Thickness (T865)',
            # title=f'MANACAPURU v6 WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)',
            savepathname=svpt3)

        # IMG C - KD Histogram
        x = fdf['Oa08_reflectance:float'].copy()

        pk, xray, yray, kde_res = self.kde_local_maxima(x)

        self.plot_kde_histntable(xray=xray,
                                 yray=yray,
                                 x=x,
                                 kde_res=kde_res,
                                 pk=pk,
                                 svpath_n_title=svpt4)

        # IMG D - Reflectance
        self.s3l2_custom_reflectance_plot(
            df=fdf,
            # figure_title=figdate,
            save_title=svpt5)

        # Report
        images = [Image.open(x) for x in [svpt1, svpt2, svpt3, svpt4, svpt5]]
        report = Utils.pil_grid(images, 1)

        if output_rprt_path:
            report.save(svpt_report, resolution=100.0)

        plt.close('all')

        return report
Beispiel #6
0
    def process_csv_list(self,
                         raw_csv_list,
                         irmax=0.2,
                         use_cams=False,
                         do_clustering=True,
                         k_method='M4'):
        """

        :param k_method:
        :param do_clustering:
        :param use_cams:
        :param irmax:
        :param raw_csv_list: [List] containing the absolute path to files extracted by self.get_s3_data
        :return:
        """
        # irmax = 0.001 # Negro
        # irmax = 0.08 # Fonte Boa
        # irmin = 0.001 # Manacapuru
        tsgen = TsGenerator(parent_log=self.log)

        # GET SERIES SAVE PATH # TODO: refactor
        excel_save_path = os.path.join(self.OUTPUT_DIR, 'sen3r.xlsx')
        out_dir = os.path.join(self.OUTPUT_DIR, 'CSV_N2')
        img_dir = os.path.join(self.OUTPUT_DIR, 'IMG')
        # img_save_pth = os.path.join(dest, station_name + f'_v{version}_img_dbscan')
        # series_save_pth = os.path.join(dest, station_name + f'_v{version}_img_dbscan_series')

        # CREATE THE DIRECTORIES IF THEY DOESN'T EXIST YET
        Path(out_dir).mkdir(parents=True, exist_ok=True)
        Path(img_dir).mkdir(parents=True, exist_ok=True)
        # Path(img_save_pth).mkdir(parents=True, exist_ok=True)
        # Path(series_save_pth).mkdir(parents=True, exist_ok=True)

        # Start timer
        t1 = time.perf_counter()

        max_aot = False

        # Update RAW DFs
        total = len(raw_csv_list)

        if use_cams:
            # READ CAMS
            df_cams = pd.read_csv(self.arguments['cams'])
            df_cams['pydate'] = pd.to_datetime(df_cams['Datetime'])

        for n, img in enumerate(raw_csv_list):

            print(f'>>> Processing: {n + 1} of {total} ... {img}')
            self.log.info(f'>>> Processing: {n + 1} of {total} ... {img}')

            figdate = os.path.basename(img).split('____')[1].split('_')[0]
            figtitl = os.path.basename(out_dir) + '_' + figdate
            savpt_raw_sctr = os.path.join(img_dir, figdate + '_0.png')
            savpt_sctr = os.path.join(img_dir, figdate + '_1.png')
            savpt_rrs = os.path.join(img_dir, figdate + '_2.png')
            savpt_k = os.path.join(img_dir, figdate + '_3.png')

            if use_cams:
                # Find the equivalent observation day in CAMS
                dtlbl = datetime.strptime(figdate, '%Y%m%dT%H%M%S')
                dtlbl = dtlbl.replace(hour=12,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
                cams_row = df_cams[df_cams['pydate'] == dtlbl]
                cams_val = cams_row['AOD865'].values[0]
                # if cams_val is empty no match was found
                if not cams_val:
                    cams_val = False

            else:
                cams_val = False

            # read and plot the
            rawDf = pd.read_csv(img, sep=',')
            tsgen.plot_sidebyside_sktr(
                x1_data=rawDf['Oa08_reflectance:float'],
                y1_data=rawDf['Oa17_reflectance:float'],
                x2_data=rawDf['Oa08_reflectance:float'],
                y2_data=rawDf['Oa17_reflectance:float'],
                x_lbl='RED: Oa08 (665nm)',
                y_lbl='NIR: Oa17 (865nm)',
                c1_data=rawDf['A865:float'],
                c1_lbl='Aer. Angstrom Expoent (A865)',
                c2_data=rawDf['T865:float'],
                c2_lbl='Aer. Optical Thickness (T865)',
                title=
                f'RAW {os.path.basename(out_dir)} WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)',
                savepathname=savpt_raw_sctr)

            # reprocessing the raw CSVs and removing reflectances above the threshold in IR.
            try:
                dfpth, df = tsgen.update_csvs(
                    csv_path=img,
                    glint=20.0,
                    # ir_min_threshold=irmin,
                    ir_max_threshold=irmax,
                    savepath=out_dir,
                    max_aot=max_aot,
                    cams_val=cams_val)

            except Exception as e:
                print("type error: " + str(e))
                continue

            if len(df) < 1:
                print(f'Skipping empty CSV: {dfpth}')
                continue

            # ,--------------------,
            # | DBSCAN Clustering  |------------------------------------------------------------------------------------
            # '--------------------'
            if do_clustering:
                # Backup the DF before cleaning it with DBSCAN
                bkpdf = df.copy()

                # Apply DBSCAN
                tsgen.db_scan(df, dd.clustering_methods[k_method])

                # Plot and save the identified clusters
                tsgen.plot_scattercluster(df,
                                          col_x='Oa17_reflectance:float',
                                          col_y='Oa08_reflectance:float',
                                          col_color='T865:float',
                                          title=f'DBSCAN {figdate}',
                                          savepath=savpt_k)

                # Delete rows classified as noise:
                indexNames = df[df['cluster'] == -1].index
                df.drop(indexNames, inplace=True)

                if len(df) > 1:
                    clusters = df.groupby(by='cluster').median()
                    k = Utils.find_nearest(clusters['Oa21_reflectance:float'],
                                           0)
                    # Delete rows from the other clusters:
                    indexNames = df[df['cluster'] != k].index
                    df.drop(indexNames, inplace=True)
                    # TODO : test cluster with the smallest T865 value as a primary/secondary rule.
                else:
                    df = bkpdf.copy()

            tsgen.plot_sidebyside_sktr(
                x1_data=df['Oa08_reflectance:float'],
                y1_data=df['Oa17_reflectance:float'],
                x2_data=df['Oa08_reflectance:float'],
                y2_data=df['Oa17_reflectance:float'],
                x_lbl='RED: Oa08 (665nm)',
                y_lbl='NIR: Oa17 (865nm)',
                c1_data=df['A865:float'],
                c1_lbl='Aer. Angstrom Expoent (A865)',
                c2_data=df['T865:float'],
                c2_lbl='Aer. Optical Thickness (T865)',
                title=
                f'{os.path.basename(out_dir)} WFR {figdate} RED:Oa08(665nm) x NIR:Oa17(865nm)',
                savepathname=savpt_sctr)

            tsgen.s3l2_custom_reflectance_plot(
                df=df,
                figure_title=f'{figdate}\n',
                c_lbl='Aer. Optical Thickness (T865)',
                save_title=savpt_rrs)

        print(f'Generating EXCEL output at: {excel_save_path}')
        self.log.info(f'Generating EXCEL output at: {excel_save_path}')

        # Generating excel file from the post-processed data
        wdir = out_dir
        todo = tsgen.build_list_from_subset(wdir)

        # Converting and saving the list of mean values into a XLS excel file.
        data = tsgen.generate_tms_data(wdir, todo)

        series_df = pd.DataFrame(data=data)
        # Delete these row indexes from dataFrame
        # indexNames = series_df[series_df['B17-865'] > irmax].index
        # indexNames = series_df[series_df['B17-865'] < irmin].index
        # series_df.drop(indexNames, inplace=True)

        # create empty excel
        wb = openpyxl.Workbook()
        wb.save(excel_save_path)

        # open the empty file and fill it up
        book = openpyxl.load_workbook(excel_save_path)
        writer = pd.ExcelWriter(excel_save_path, engine='openpyxl')
        writer.book = book

        # Saving to Excel .xlsx
        series_df.to_excel(writer, sheet_name='wfr', index=False)
        writer.save()
        writer.close()

        # Custom paiting the cells
        # https://openpyxl.readthedocs.io/en/stable/_modules/openpyxl/styles/colors.html
        wb = openpyxl.load_workbook(excel_save_path)

        # Delete the empty sheet
        del wb['Sheet']

        # Get the sheet containing the final output
        ws = wb['wfr']

        mod3r_colors = {
            0: '00FFFFFF',
            1: '00008000',
            2: '00FE6000',
            3: '00FF0000'
        }

        for row in ws.iter_rows(min_row=2, min_col=None, max_col=None):
            # get the quality flag for the given row
            flag_qlt = row[42]
            for cell in row:
                color_code = mod3r_colors[flag_qlt.value]
                cell.fill = PatternFill(start_color=color_code,
                                        end_color=color_code,
                                        fill_type="solid")

        wb.save(excel_save_path)

        t2 = time.perf_counter()
        outputstr = f'>>> Finished in {round(t2 - t1, 2)} second(s). <<<'
        print(outputstr)
        self.log.info(outputstr)
        pass
Beispiel #7
0
        else:  # Default mode: several images
            doneList = s3r.build_raw_csvs()
            print('cams_args:', s3r.arguments['cams'])
            if s3r.arguments["cams"]:
                s3r.process_csv_list(raw_csv_list=doneList, irmax=0.01, use_cams=True, k_method=s3r.arguments['cluster'])
            else:
                s3r.process_csv_list(raw_csv_list=doneList, k_method=s3r.arguments['cluster'])

    pass


if __name__ == '__main__':
    # ,--------------,
    # | Start timers |--------------------------------------------------------------------------------------------------
    # '--------------'
    Utils.tic()
    t1 = time.perf_counter()
    # ,-----,
    # | RUN |-----------------------------------------------------------------------------------------------------------
    # '-----'
    main()
    # ,------------------------------,
    # | End timers and report to log |----------------------------------------------------------------------------------
    # '------------------------------'
    t_hour, t_min, t_sec = Utils.tac()
    t2 = time.perf_counter()
    outputstr = f'Finished in {round(t2 - t1, 2)} second(s).'
    final_message = f'Elapsed execution time: {t_hour}h : {t_min}m : {t_sec}s'
    print(outputstr)
    print(final_message)