def main(): config_filepaths = [ Path("config/general.ini"), Path("config/local_overrule.ini") ] year = 2018 # Read the configuration files conf.read_config(config_filepaths) # Init logging base_log_dir = conf.dirs.getpath('log_dir') log_dir = base_log_dir / f"{os.sep}calc_dias_weekly{datetime.now():%Y-%m-%d_%H-%M-%S}" global logger logger = log_helper.main_log_init(log_dir, __name__) logger.info(f"Config used: \n{conf.pformat_config()}") # Get the config needed timeseries_per_image_dir = conf.dirs.getpath('timeseries_per_image_dir') timeseries_periodic_dir = conf.dirs.getpath('timeseries_periodic_dir') # Input features file depends on the year if year == 2017: input_features_filename = "Prc_BEFL_2017_2019-06-14_bufm5.shp" elif year == 2018: input_features_filename = "Prc_BEFL_2018_2019-06-14_bufm5.shp" elif year == 2019: #input_features_filename = "Prc_BEFL_2019_2019-06-25_bufm5.shp" input_features_filename = "Prc_BEFL_2019_2019-07-02_bufm5.shp" else: raise Exception(f"Not a valid year: {year}") # Calculate! input_parcel_filepath = conf.dirs.getpath( 'input_dir') / input_features_filename ts_util.calculate_periodic_data( input_parcel_filepath=input_parcel_filepath, input_base_dir=timeseries_per_image_dir, start_date_str=f"{year}-03-15", end_date_str=f"{year}-08-15", #sensordata_to_get=conf.marker.getlist('sensordata_to_use'), sensordata_to_get=['SENSORDATA_S1_COHERENCE'], dest_data_dir=timeseries_periodic_dir, force=False)
def calc_marker_task(config_filepaths: List[Path], default_basedir: Path): """ Runs a marker using the setting in the config_filepaths. Args: config_filepaths (List[Path]): the config files to load default_basedir (Path): the dir to resolve relative paths in the config file to. Raises: Exception: [description] Exception: [description] """ # Read the configuration files conf.read_config(config_filepaths, default_basedir=default_basedir) # Create run dir to be used for the results reuse_last_run_dir = conf.calc_marker_params.getboolean( 'reuse_last_run_dir') reuse_last_run_dir_config = conf.calc_marker_params.getboolean( 'reuse_last_run_dir_config') run_dir = dir_helper.create_run_dir(conf.dirs.getpath('marker_dir'), reuse_last_run_dir) if not run_dir.exists(): os.makedirs(run_dir) # Main initialisation of the logging logger = log_helper.main_log_init(run_dir, __name__) logger.info( f"Run dir with reuse_last_run_dir: {reuse_last_run_dir}, {run_dir}") logger.info(f"Config used: \n{conf.pformat_config()}") # If running in conda, export the environment conda_env = os.environ.get('CONDA_DEFAULT_ENV') if conda_env is not None: environment_yml_path = run_dir / f"{conda_env}.yml" logger.info(f"Export conda environment used to {environment_yml_path}") os.system(f"conda env export > {environment_yml_path}") # If the config needs to be reused as well, load it, else write it config_used_filepath = run_dir / 'config_used.ini' if (reuse_last_run_dir and reuse_last_run_dir_config and run_dir.exists() and config_used_filepath.exists()): config_filepaths.append(config_used_filepath) logger.info( f"Run dir config needs to be reused, so {config_filepaths}") conf.read_config(config_filepaths=config_filepaths, default_basedir=default_basedir) logger.info( "Write new config_used.ini, because some parameters might have been added" ) with open(config_used_filepath, 'w') as config_used_file: conf.config.write(config_used_file) else: # Copy the config files to a config dir for later notice configfiles_used_dir = run_dir / "configfiles_used" if configfiles_used_dir.exists(): shutil.rmtree(configfiles_used_dir) configfiles_used_dir.mkdir() for config_filepath in config_filepaths: shutil.copy(config_filepath, configfiles_used_dir) # Write the resolved complete config, so it can be reused logger.info("Write config_used.ini, so it can be reused later on") with open(config_used_filepath, 'w') as config_used_file: conf.config.write(config_used_file) # Read the info about the run input_parcel_filename = conf.calc_marker_params.getpath( 'input_parcel_filename') input_parcel_filetype = conf.calc_marker_params['input_parcel_filetype'] country_code = conf.calc_marker_params['country_code'] classes_refe_filename = conf.calc_marker_params.getpath( 'classes_refe_filename') input_groundtruth_filename = conf.calc_marker_params.getpath( 'input_groundtruth_filename') input_model_to_use_relativepath = conf.calc_marker_params.getpath( 'input_model_to_use_relativepath') # Prepare input filepaths if input_model_to_use_relativepath is not None: input_model_to_use_filepath = conf.dirs.getpath( 'model_dir') / input_model_to_use_relativepath if not input_model_to_use_filepath.exists(): raise Exception( f"Input file input_model_to_use_filepath doesn't exist: {input_model_to_use_filepath}" ) else: input_model_to_use_filepath = None input_dir = conf.dirs.getpath('input_dir') input_parcel_filepath = input_dir / input_parcel_filename if input_groundtruth_filename is not None: input_groundtruth_filepath = input_dir / input_groundtruth_filename else: input_groundtruth_filepath = None refe_dir = conf.dirs.getpath('refe_dir') classes_refe_filepath = refe_dir / classes_refe_filename # Check if the necessary input files exist... for path in [classes_refe_filepath, input_parcel_filepath]: if path is not None and not path.exists(): message = f"Input file doesn't exist, so STOP: {path}" logger.critical(message) raise Exception(message) # Get some general config data_ext = conf.general['data_ext'] output_ext = conf.general['output_ext'] geofile_ext = conf.general['geofile_ext'] #------------------------------------------------------------- # The real work #------------------------------------------------------------- # STEP 1: prepare parcel data for classification and image data extraction #------------------------------------------------------------- # Prepare the input data for optimal image data extraction: # 1) apply a negative buffer on the parcel to evade mixels # 2) remove features that became null because of buffer input_preprocessed_dir = conf.dirs.getpath('input_preprocessed_dir') buffer = conf.marker.getint('buffer') input_parcel_nogeo_filepath = input_preprocessed_dir / f"{input_parcel_filename.stem}{data_ext}" imagedata_input_parcel_filename = f"{input_parcel_filename.stem}_bufm{buffer}{geofile_ext}" imagedata_input_parcel_filepath = input_preprocessed_dir / imagedata_input_parcel_filename ts_util.prepare_input( input_parcel_filepath=input_parcel_filepath, output_imagedata_parcel_input_filepath=imagedata_input_parcel_filepath, output_parcel_nogeo_filepath=input_parcel_nogeo_filepath) # STEP 2: Get the timeseries data needed for the classification #------------------------------------------------------------- # Get the time series data (S1 and S2) to be used for the classification # Result: data is put in files in timeseries_periodic_dir, in one file per # date/period timeseries_periodic_dir = conf.dirs.getpath('timeseries_periodic_dir') start_date_str = conf.marker['start_date_str'] end_date_str = conf.marker['end_date_str'] sensordata_to_use = conf.marker.getlist('sensordata_to_use') parceldata_aggregations_to_use = conf.marker.getlist( 'parceldata_aggregations_to_use') base_filename = f"{input_parcel_filename.stem}_bufm{buffer}_weekly" ts.calc_timeseries_data( input_parcel_filepath=imagedata_input_parcel_filepath, input_country_code=country_code, start_date_str=start_date_str, end_date_str=end_date_str, sensordata_to_get=sensordata_to_use, base_filename=base_filename, dest_data_dir=timeseries_periodic_dir) # STEP 3: Preprocess all data needed for the classification #------------------------------------------------------------- # Prepare the basic input file with the classes that will be classified to. # Remarks: # - this is typically specific for the input dataset and result wanted!!! # - the result is/should be a file with the following columns # - id (=id_column): unique ID for each parcel # - classname (=class_column): the class that must # be classified to. # Remarks: - if in classes_to_ignore_for_train, class won't be used for training # - if in classes_to_ignore, the class will be ignored # - pixcount: # the number of S1/S2 pixels in the parcel. # Is -1 if the parcel doesn't have any S1/S2 data. classtype_to_prepare = conf.preprocess['classtype_to_prepare'] parcel_filepath = run_dir / f"{input_parcel_filename.stem}_parcel{data_ext}" parcel_pixcount_filepath = timeseries_periodic_dir / f"{base_filename}_pixcount{data_ext}" class_pre.prepare_input( input_parcel_filepath=input_parcel_nogeo_filepath, input_parcel_filetype=input_parcel_filetype, input_parcel_pixcount_filepath=parcel_pixcount_filepath, classtype_to_prepare=classtype_to_prepare, classes_refe_filepath=classes_refe_filepath, output_parcel_filepath=parcel_filepath) # Collect all data needed to do the classification in one input file parcel_classification_data_filepath = run_dir / f"{base_filename}_parcel_classdata{data_ext}" ts.collect_and_prepare_timeseries_data( input_parcel_filepath=input_parcel_nogeo_filepath, timeseries_dir=timeseries_periodic_dir, base_filename=base_filename, output_filepath=parcel_classification_data_filepath, start_date_str=start_date_str, end_date_str=end_date_str, sensordata_to_use=sensordata_to_use, parceldata_aggregations_to_use=parceldata_aggregations_to_use) # STEP 4: Train and test if necessary... and predict #------------------------------------------------------------- markertype = conf.marker.get('markertype') parcel_predictions_proba_all_filepath = run_dir / f"{base_filename}_predict_proba_all{data_ext}" classifier_ext = conf.classifier['classifier_ext'] classifier_basefilepath = run_dir / f"{markertype}_01_mlp{classifier_ext}" # Check if a model exists already if input_model_to_use_filepath is None: best_model = mh.get_best_model(run_dir, acc_metric_mode='min') if best_model is not None: input_model_to_use_filepath = best_model['filepath'] # if there is no model to use specified, train one! parcel_test_filepath = None parcel_predictions_proba_test_filepath = None if input_model_to_use_filepath is None: # Create the training sample... # Remark: this creates a list of representative test parcel + a list of (candidate) training parcel balancing_strategy = conf.marker['balancing_strategy'] parcel_train_filepath = run_dir / f"{base_filename}_parcel_train{data_ext}" parcel_test_filepath = run_dir / f"{base_filename}_parcel_test{data_ext}" class_pre.create_train_test_sample( input_parcel_filepath=parcel_filepath, output_parcel_train_filepath=parcel_train_filepath, output_parcel_test_filepath=parcel_test_filepath, balancing_strategy=balancing_strategy) # Train the classifier and output predictions parcel_predictions_proba_test_filepath = run_dir / f"{base_filename}_predict_proba_test{data_ext}" classification.train_test_predict( input_parcel_train_filepath=parcel_train_filepath, input_parcel_test_filepath=parcel_test_filepath, input_parcel_all_filepath=parcel_filepath, input_parcel_classification_data_filepath= parcel_classification_data_filepath, output_classifier_basefilepath=classifier_basefilepath, output_predictions_test_filepath= parcel_predictions_proba_test_filepath, output_predictions_all_filepath= parcel_predictions_proba_all_filepath) else: # there is a classifier specified, so just use it! classification.predict( input_parcel_filepath=parcel_filepath, input_parcel_classification_data_filepath= parcel_classification_data_filepath, input_classifier_basefilepath=classifier_basefilepath, input_classifier_filepath=input_model_to_use_filepath, output_predictions_filepath=parcel_predictions_proba_all_filepath) # STEP 5: if necessary, do extra postprocessing #------------------------------------------------------------- '''if postprocess_to_groups is not None: # TODO ''' # STEP 6: do the default, mandatory postprocessing #------------------------------------------------------------- # If it was necessary to train, there will be a test prediction... so postprocess it parcel_predictions_test_filepath = None if (input_model_to_use_filepath is None and parcel_test_filepath is not None and parcel_predictions_proba_test_filepath is not None): parcel_predictions_test_filepath = run_dir / f"{base_filename}_predict_test{data_ext}" class_post.calc_top3_and_consolidation( input_parcel_filepath=parcel_test_filepath, input_parcel_probabilities_filepath= parcel_predictions_proba_test_filepath, output_predictions_filepath=parcel_predictions_test_filepath) # Postprocess predictions parcel_predictions_all_filepath = run_dir / f"{base_filename}_predict_all{data_ext}" parcel_predictions_all_output_filepath = run_dir / f"{base_filename}_predict_all_output{output_ext}" class_post.calc_top3_and_consolidation( input_parcel_filepath=parcel_filepath, input_parcel_probabilities_filepath= parcel_predictions_proba_all_filepath, output_predictions_filepath=parcel_predictions_all_filepath, output_predictions_output_filepath= parcel_predictions_all_output_filepath) # STEP 7: Report on the accuracy, incl. ground truth #------------------------------------------------------------- # Preprocess the ground truth data if it is provided groundtruth_filepath = None if input_groundtruth_filepath is not None: groundtruth_filepath = run_dir / f"{input_groundtruth_filepath.stem}_classes{input_groundtruth_filepath.suffix}" class_pre.prepare_input( input_parcel_filepath=input_groundtruth_filepath, input_parcel_filetype=input_parcel_filetype, input_parcel_pixcount_filepath=parcel_pixcount_filepath, classtype_to_prepare=conf. preprocess['classtype_to_prepare_groundtruth'], classes_refe_filepath=classes_refe_filepath, output_parcel_filepath=groundtruth_filepath) # If we trained a model, there is a test prediction we want to report on if input_model_to_use_filepath is None and parcel_predictions_test_filepath is not None: # Print full reporting on the accuracy of the test dataset report_txt = Path( f"{str(parcel_predictions_test_filepath)}_accuracy_report.txt") class_report.write_full_report( parcel_predictions_filepath=parcel_predictions_test_filepath, output_report_txt=report_txt, parcel_ground_truth_filepath=groundtruth_filepath) # Print full reporting on the accuracy of the full dataset report_txt = Path( f"{str(parcel_predictions_all_filepath)}_accuracy_report.txt") class_report.write_full_report( parcel_predictions_filepath=parcel_predictions_all_filepath, output_report_txt=report_txt, parcel_ground_truth_filepath=groundtruth_filepath) logging.shutdown()
def main(): # Read the configuration segment_config_filepaths = [Path('../config/general.ini')] conf.read_config(segment_config_filepaths) # Main initialisation of the logging logger = log_helper.main_log_init(conf.dirs.getpath('log_dir'), __name__) logger.info("Start") logger.info(f"Config used: \n{conf.pformat_config()}") logger.info(pprint.pformat(dict(os.environ))) # Init variables #parcels_filepath = r"X:\GIS\GIS DATA\Percelen_ALP\Vlaanderen\Perc_VL_2019_2019-07-28\perc_2019_met_k_2019-07-28.shp" #overlap_filepath = r"X:\Monitoring\OrthoSeg\sealedsurfaces\output_vector\sealedsurfaces_10\sealedsurfaces_10_orig.gpkg" input_preprocessed_dir = conf.dirs.getpath('input_preprocessed_dir') parcels_filepath = input_preprocessed_dir / 'Prc_BEFL_2019_2019-07-02_bufm5_32632.gpkg' overlap_filepath = input_preprocessed_dir / 'Prc_BEFL_2019_2019-07-02_bufm5_32632.gpkg' # Read parcels file to memory (isn't that large...) #parcels_gpd = geofile_helper.read_file(parcels_filepath) # Loop over parcels and calculate overlap logger.info(f"Connect to {overlap_filepath}") conn = sqlite3.connect(str(overlap_filepath)) conn.enable_load_extension(True) #now we can load the extension # depending on your OS and sqlite/spatialite version you might need to add # '.so' (Linux) or '.dll' (Windows) to the extension name #mod_spatialite (recommended) #conn.execute("SELECT load_extension('spatialite.dll')") conn.load_extension('mod_spatialite') conn.execute('SELECT InitSpatialMetaData(1);') """ # libspatialite conn.execute('SELECT load_extension("libspatialite")') conn.execute('SELECT InitSpatialMetaData();') """ c = conn.cursor() c.execute("SELECT sqlite_version()") for row in c: logger.info(f"test: {row}") c.execute("select name from sqlite_master where type = 'table'") for row in c: logger.info(f"Table: {row}") c.execute( """SELECT t.uid, t.fid, MbrMinX(t.geom), ST_GeometryType(t.geom), ST_AsText(GeomFromGPB(t.geom)) FROM info t JOIN rtree_info_geom r ON t.fid = r.id WHERE r.minx >= 50000 AND r.maxx <= 51000 """) """SELECT t.fid, ST_AsText(t.geom) FROM info t JOIN rtree_info_geom r ON t.fid = r.id """ """SELECT t.fid, AsText(t.geom) FROM "default" t JOIN rtree_default_geom r ON t.fid = r.id WHERE r.minx <= 200000 AND r.maxx >= 205000 AND r.miny <= 200000 AND r.maxy >= 201000 """ logger.info(f"test") for i, row in enumerate(c): logger.info(f"test: {row}") if i >= 10: break
def run(markertype_to_calc: str, input_parcel_filename: str, input_parcel_filetype: str, country_code: str, year: int, input_groundtruth_filename: str, input_model_to_use_filepath: str): """ Runs a marker for an input file. If no input model to use is specified, a new one will be trained. Args """ # If a model to use is specified, check if it exists... if input_model_to_use_filepath is not None and not os.path.exists(input_model_to_use_filepath): raise Exception(f"Input file input_model_to_use_filepath doesn't exist: {input_model_to_use_filepath}") # Determine the config files to load depending on the marker_type marker_ini = f"config/{markertype_to_calc.lower()}.ini" config_filepaths = ["config/general.ini", marker_ini, "config/local_overrule.ini"] # Read the configuration files conf.read_config(config_filepaths, year=year) # Create run dir to be used for the results reuse_last_run_dir = conf.dirs.getboolean('reuse_last_run_dir') reuse_last_run_dir_config = conf.dirs.getboolean('reuse_last_run_dir_config') run_dir = dir_helper.create_run_dir(conf.dirs['marker_base_dir'], reuse_last_run_dir) if not os.path.exists(run_dir): os.makedirs(run_dir) # Main initialisation of the logging logger = log_helper.main_log_init(run_dir, __name__) logger.info(f"Run dir with reuse_last_run_dir: {reuse_last_run_dir}, {run_dir}") logger.info(f"Config used: \n{conf.pformat_config()}") # If the config needs to be reused as well, load it, else write it config_used_filepath = os.path.join(run_dir, 'config_used.ini') if(reuse_last_run_dir and reuse_last_run_dir_config and os.path.exists(run_dir) and os.path.exists(config_used_filepath)): config_filepaths.append(config_used_filepath) logger.info(f"Run dir config needs to be reused, so {config_filepaths}") conf.read_config(config_filepaths=config_filepaths, year=year) logger.info("Write new config_used.ini, because some parameters might have been added") with open(config_used_filepath, 'w') as config_used_file: conf.config.write(config_used_file) else: logger.info("Write config_used.ini, so it can be reused later on") with open(config_used_filepath, 'w') as config_used_file: conf.config.write(config_used_file) # Prepare input filepaths input_dir = conf.dirs['input_dir'] input_parcel_filepath = os.path.join(input_dir, input_parcel_filename) if input_groundtruth_filename is not None: input_groundtruth_filepath = os.path.join(input_dir, input_groundtruth_filename) else: input_groundtruth_filepath = None # Check if the necessary input files exist... if not os.path.exists(input_parcel_filepath): message = f"The parcel input file doesn't exist, so STOP: {input_parcel_filepath}" logger.critical(message) raise Exception(message) # Get some general config data_ext = conf.general['data_ext'] output_ext = conf.general['output_ext'] geofile_ext = conf.general['geofile_ext'] #------------------------------------------------------------- # The real work #------------------------------------------------------------- # STEP 1: prepare parcel data for classification and image data extraction #------------------------------------------------------------- # Prepare the input data for optimal image data extraction: # 1) apply a negative buffer on the parcel to evade mixels # 2) remove features that became null because of buffer input_preprocessed_dir = conf.dirs['input_preprocessed_dir'] input_parcel_filename_noext, _ = os.path.splitext(input_parcel_filename) buffer = conf.marker.getint('buffer') input_parcel_nogeo_filepath = os.path.join( input_preprocessed_dir, f"{input_parcel_filename_noext}{data_ext}") imagedata_input_parcel_filename_noext = f"{input_parcel_filename_noext}_bufm{buffer}" imagedata_input_parcel_filepath = os.path.join( input_preprocessed_dir, f"{imagedata_input_parcel_filename_noext}{geofile_ext}") ts_util.prepare_input( input_parcel_filepath=input_parcel_filepath, output_imagedata_parcel_input_filepath=imagedata_input_parcel_filepath, output_parcel_nogeo_filepath=input_parcel_nogeo_filepath) # STEP 2: Get the timeseries data needed for the classification #------------------------------------------------------------- # Get the time series data (S1 and S2) to be used for the classification # Result: data is put in files in timeseries_periodic_dir, in one file per # date/period timeseries_periodic_dir = conf.dirs['timeseries_periodic_dir'] start_date_str = conf.marker['start_date_str'] end_date_str = conf.marker['end_date_str'] sensordata_to_use = conf.marker.getlist('sensordata_to_use') parceldata_aggregations_to_use = conf.marker.getlist('parceldata_aggregations_to_use') base_filename = f"{input_parcel_filename_noext}_bufm{buffer}_weekly" ts.calc_timeseries_data( input_parcel_filepath=imagedata_input_parcel_filepath, input_country_code=country_code, start_date_str=start_date_str, end_date_str=end_date_str, sensordata_to_get=sensordata_to_use, base_filename=base_filename, dest_data_dir=timeseries_periodic_dir) # STEP 3: Preprocess all data needed for the classification #------------------------------------------------------------- # Prepare the basic input file with the classes that will be classified to. # Remarks: # - this is typically specific for the input dataset and result wanted!!! # - the result is/should be a file with the following columns # - id (=global_settings.id_column): unique ID for each parcel # - classname (=global_settings.class_column): the class that must # be classified to. # Remarks: - if in classes_to_ignore_for_train, class won't be used for training # - if in classes_to_ignore, the class will be ignored # - pixcount (=global_settings.pixcount_s1s2_column): # the number of S1/S2 pixels in the parcel. # Is -1 if the parcel doesn't have any S1/S2 data. classtype_to_prepare = conf.preprocess['classtype_to_prepare'] parcel_filepath = os.path.join( run_dir, f"{input_parcel_filename_noext}_parcel{data_ext}") parcel_pixcount_filepath = os.path.join( timeseries_periodic_dir, f"{base_filename}_pixcount{data_ext}") class_pre.prepare_input( input_parcel_filepath=input_parcel_nogeo_filepath, input_parcel_filetype=input_parcel_filetype, input_parcel_pixcount_filepath=parcel_pixcount_filepath, classtype_to_prepare=classtype_to_prepare, output_parcel_filepath=parcel_filepath) # Collect all data needed to do the classification in one input file parcel_classification_data_filepath = os.path.join( run_dir, f"{base_filename}_parcel_classdata{data_ext}") ts.collect_and_prepare_timeseries_data( input_parcel_filepath=input_parcel_nogeo_filepath, timeseries_dir=timeseries_periodic_dir, base_filename=base_filename, output_filepath=parcel_classification_data_filepath, start_date_str=start_date_str, end_date_str=end_date_str, sensordata_to_use=sensordata_to_use, parceldata_aggregations_to_use=parceldata_aggregations_to_use) # STEP 4: Train and test if necessary... and predict #------------------------------------------------------------- parcel_predictions_proba_all_filepath = os.path.join( run_dir, f"{base_filename}_predict_proba_all{data_ext}") # if there is no model to use specified, train one! if input_model_to_use_filepath is None: # Create the training sample... # Remark: this creates a list of representative test parcel + a list of (candidate) training parcel balancing_strategy = conf.marker['balancing_strategy'] parcel_train_filepath = os.path.join(run_dir, f"{base_filename}_parcel_train{data_ext}") parcel_test_filepath = os.path.join( run_dir, f"{base_filename}_parcel_test{data_ext}") class_pre.create_train_test_sample( input_parcel_filepath=parcel_filepath, output_parcel_train_filepath=parcel_train_filepath, output_parcel_test_filepath=parcel_test_filepath, balancing_strategy=balancing_strategy) # Train the classifier and output predictions classifier_ext = conf.classifier['classifier_ext'] classifier_filepath = os.path.splitext(parcel_train_filepath)[0] + f"_classifier{classifier_ext}" parcel_predictions_proba_test_filepath = os.path.join( run_dir, f"{base_filename}_predict_proba_test{data_ext}") classification.train_test_predict( input_parcel_train_filepath=parcel_train_filepath, input_parcel_test_filepath=parcel_test_filepath, input_parcel_all_filepath=parcel_filepath, input_parcel_classification_data_filepath=parcel_classification_data_filepath, output_classifier_filepath=classifier_filepath, output_predictions_test_filepath=parcel_predictions_proba_test_filepath, output_predictions_all_filepath=parcel_predictions_proba_all_filepath) else: # there is a classifier specified, so just use it! classification.predict( input_parcel_filepath=parcel_filepath, input_parcel_classification_data_filepath=parcel_classification_data_filepath, input_classifier_filepath=input_model_to_use_filepath, output_predictions_filepath=parcel_predictions_proba_all_filepath) # STEP 5: if necessary, do extra postprocessing #------------------------------------------------------------- '''if postprocess_to_groups is not None: # TODO ''' # STEP 6: do the default, mandatory postprocessing #------------------------------------------------------------- # If it was necessary to train, there will be a test prediction... so postprocess it if input_model_to_use_filepath is None: parcel_predictions_test_filepath = os.path.join( run_dir, f"{base_filename}_predict_test{data_ext}") class_post.calc_top3_and_consolidation( input_parcel_filepath=parcel_test_filepath, input_parcel_probabilities_filepath=parcel_predictions_proba_test_filepath, output_predictions_filepath=parcel_predictions_test_filepath) # Postprocess predictions parcel_predictions_all_filepath = os.path.join( run_dir, f"{base_filename}_predict_all{data_ext}") parcel_predictions_all_output_filepath = os.path.join( run_dir, f"{base_filename}_predict_all_output{output_ext}") class_post.calc_top3_and_consolidation( input_parcel_filepath=parcel_filepath, input_parcel_probabilities_filepath=parcel_predictions_proba_all_filepath, output_predictions_filepath=parcel_predictions_all_filepath, output_predictions_output_filepath=parcel_predictions_all_output_filepath) # STEP 7: Report on the accuracy, incl. ground truth #------------------------------------------------------------- # Preprocess the ground truth data if it is provided groundtruth_filepath = None if input_groundtruth_filepath is not None: _, input_gt_filename = os.path.split(input_groundtruth_filepath) input_gt_filename_noext, input_gt_filename_ext = os.path.splitext(input_gt_filename) groundtruth_filepath = os.path.join( run_dir, f"{input_gt_filename_noext}_classes{input_gt_filename_ext}") class_pre.prepare_input( input_parcel_filepath=input_groundtruth_filepath, input_parcel_filetype=input_parcel_filetype, input_parcel_pixcount_filepath=parcel_pixcount_filepath, classtype_to_prepare=conf.preprocess['classtype_to_prepare_groundtruth'], output_parcel_filepath=groundtruth_filepath) # If we trained a model, there is a test prediction we want to report on if input_model_to_use_filepath is None: # Print full reporting on the accuracy of the test dataset report_txt = f"{parcel_predictions_test_filepath}_accuracy_report.txt" class_report.write_full_report( parcel_predictions_filepath=parcel_predictions_test_filepath, output_report_txt=report_txt, parcel_ground_truth_filepath=groundtruth_filepath) # Print full reporting on the accuracy of the full dataset report_txt = f"{parcel_predictions_all_filepath}_accuracy_report.txt" class_report.write_full_report( parcel_predictions_filepath=parcel_predictions_all_filepath, output_report_txt=report_txt, parcel_ground_truth_filepath=groundtruth_filepath) logging.shutdown()
def main(): # Determine the config files to load depending on the marker_type config_filepaths = [ "../config/general.ini", "../config/local_overrule_linux.ini" ] test = False # Specify the date range: years = [2018, 2019] month_start = 3 month_stop = 8 for year in years: # Read the configuration files conf.read_config(config_filepaths, year=year) # Get the general output dir input_preprocessed_dir = conf.dirs['input_preprocessed_dir'] timeseries_per_image_dir = conf.dirs['timeseries_per_image_dir'] # Init logging if not test: base_log_dir = conf.dirs['log_dir'] else: base_log_dir = conf.dirs['log_dir'] + '_test' log_dir = f"{base_log_dir}{os.sep}calc_dias_{datetime.now():%Y-%m-%d_%H-%M-%S}" # Clean test log dir if it exist if test and os.path.exists(base_log_dir): shutil.rmtree(base_log_dir) global logger logger = log_helper.main_log_init(log_dir, __name__) logger.info(f"Config used: \n{conf.pformat_config()}") if test: logger.info( f"As we are testing, clean all test logging and use new log_dir: {log_dir}" ) # Write the consolidated config as ini file again to the run dir config_used_filepath = os.path.join(log_dir, 'config_used.ini') with open(config_used_filepath, 'w') as config_used_file: conf.config.write(config_used_file) # Input features file depends on the year if year == 2017: input_features_filename = "Prc_BEFL_2017_2019-06-14_bufm5.shp" elif year == 2018: input_features_filename = "Prc_BEFL_2018_2019-06-14_bufm5.shp" elif year == 2019: #input_features_filename = "Prc_BEFL_2019_2019-06-25_bufm5.shp" input_features_filename = "Prc_BEFL_2019_2019-07-02_bufm5.shp" else: raise Exception(f"Not a valid year: {year}") input_features_filepath = os.path.join(input_preprocessed_dir, input_features_filename) # Init output dir if not test: output_basedir = timeseries_per_image_dir else: output_basedir = timeseries_per_image_dir + '_test' logger.info( f"As we are testing, use test output basedir: {output_basedir}" ) input_features_filename_noext = os.path.splitext( input_features_filename)[0] output_dir = os.path.join(output_basedir, input_features_filename_noext) if test: if os.path.exists(output_dir): logger.info( f"As we are only testing, clean the output dir: {output_dir}" ) # By adding a / at the end, only the contents are recursively deleted shutil.rmtree(output_dir + os.sep) # Temp dir + clean contents from it. temp_dir = conf.dirs['temp_dir'] + os.sep + 'calc_dias' logger.info(f"Clean the temp dir {temp_dir}") if os.path.exists(temp_dir): # By adding a / at the end, only the contents are recursively deleted shutil.rmtree(temp_dir + os.sep) """ # TEST to extract exact footprint from S1 image... filepath = "/mnt/NAS3/CARD/FLANDERS/S1A/L1TC/2017/01/01/S1A_IW_GRDH_1SDV_20170101T055005_20170101T055030_014634_017CB9_Orb_RBN_RTN_Cal_TC.CARD/S1A_IW_GRDH_1SDV_20170101T055005_20170101T055030_014634_017CB9_Orb_RBN_RTN_Cal_TC.data/Gamma0_VH.img" image = rasterio.open(filepath) geoms = list(rasterio.features.dataset_features(src=image, as_mask=True, precision=5)) footprint = gpd.GeoDataFrame.from_features(geoms) logger.info(footprint) footprint = footprint.simplify(0.00001) logger.info(footprint) logger.info("Ready") # Start calculation """ ##### Process S1 GRD images ##### input_image_filepaths = [] for i in range(month_start, month_stop + 1): input_image_searchstr = f"/mnt/NAS3/CARD/FLANDERS/S1*/L1TC/{year}/{i:02d}/*/*.CARD" input_image_filepaths.extend(glob.glob(input_image_searchstr)) logger.info( f"Found {len(input_image_filepaths)} S1 GRD images to process") if test: # Take only the x first images found while testing #input_image_filepaths = input_image_filepaths[:10] input_image_filepaths = [] input_image_filepaths.append( "/mnt/NAS3/CARD/FLANDERS/S1A/L1TC/2018/04/09/S1A_IW_GRDH_1SDV_20180409T054153_20180409T054218_021386_024D13_D824_Orb_RBN_RTN_Cal_TC_20190612T171437.L1TC.CARD" ) input_image_filepaths.append( "/mnt/NAS3/CARD/FLANDERS/S1A/L1TC/2018/04/22/S1A_IW_GRDH_1SDV_20180422T173236_20180422T173301_021583_025328_99D1_Orb_RBN_RTN_Cal_TC_20190612T171441.L1TC.CARD" ) logger.info( f"As we are only testing, process only {len(input_image_filepaths)} test images" ) calc_ts.calc_stats_per_image(features_filepath=input_features_filepath, id_column=conf.columns['id'], image_paths=input_image_filepaths, bands=['VV', 'VH'], output_dir=output_dir, temp_dir=temp_dir, log_dir=log_dir) ##### Process S2 images ##### input_image_filepaths = [] for i in range(month_start, month_stop + 1): input_image_searchstr = f"/mnt/NAS3/CARD/FLANDERS/S2*/L2A/{year}/{i:02d}/*/*.SAFE" input_image_filepaths.extend(glob.glob(input_image_searchstr)) logger.info(f"Found {len(input_image_filepaths)} S2 images to process") if test: # Take only the x first images found while testing input_image_filepaths = input_image_filepaths[:10] logger.info( f"As we are only testing, process only {len(input_image_filepaths)} test images" ) # TODO: refactor underlying code so the SCL band is used regardless of it being passed here max_cloudcover_pct = conf.timeseries.getfloat('max_cloudcover_pct') calc_ts.calc_stats_per_image( features_filepath=input_features_filepath, id_column=conf.columns['id'], image_paths=input_image_filepaths, bands=['B02-10m', 'B03-10m', 'B04-10m', 'B08-10m', 'SCL-20m'], output_dir=output_dir, temp_dir=temp_dir, log_dir=log_dir, max_cloudcover_pct=max_cloudcover_pct) ##### Process S1 Coherence images ##### input_image_filepaths = [] for i in range(month_start, month_stop + 1): input_image_searchstr = f"/mnt/NAS3/CARD/FLANDERS/S1*/L1CO/{year}/{i:02d}/*/*.CARD" input_image_filepaths.extend(glob.glob(input_image_searchstr)) logger.info( f"Found {len(input_image_filepaths)} S1 Coherence images to process" ) if test: # Take only the x first images found while testing input_image_filepaths = input_image_filepaths[:10] logger.info( f"As we are only testing, process only {len(input_image_filepaths)} test images" ) calc_ts.calc_stats_per_image(features_filepath=input_features_filepath, id_column=conf.columns['id'], image_paths=input_image_filepaths, bands=['VV', 'VH'], output_dir=output_dir, temp_dir=temp_dir, log_dir=log_dir)