예제 #1
0
def main():
    # Read the configuration
    conf.read_config([Path('config/general.ini')])

    # Go!
    timeseries_periodic_dir = conf.dirs.getpath('timeseries_periodic_dir')
    ts_calc_gee.clean_gee_downloaded_csvs_in_dir(str(timeseries_periodic_dir))
예제 #2
0
def main():
    # Read the configuration
    conf.read_config(['config/general.ini', 'config/local_overrule.ini'],
                     year=None)

    # Go!
    timeseries_periodic_dir = conf.dirs['timeseries_periodic_dir']
    ts_calc_gee.clean_gee_downloaded_csvs_in_dir(timeseries_periodic_dir)
def main():

    config_filepaths = [
        Path("config/general.ini"),
        Path("config/local_overrule.ini")
    ]
    year = 2018

    # Read the configuration files
    conf.read_config(config_filepaths)

    # Init logging
    base_log_dir = conf.dirs.getpath('log_dir')
    log_dir = base_log_dir / f"{os.sep}calc_dias_weekly{datetime.now():%Y-%m-%d_%H-%M-%S}"
    global logger
    logger = log_helper.main_log_init(log_dir, __name__)
    logger.info(f"Config used: \n{conf.pformat_config()}")

    # Get the config needed
    timeseries_per_image_dir = conf.dirs.getpath('timeseries_per_image_dir')
    timeseries_periodic_dir = conf.dirs.getpath('timeseries_periodic_dir')

    # Input features file depends on the year
    if year == 2017:
        input_features_filename = "Prc_BEFL_2017_2019-06-14_bufm5.shp"
    elif year == 2018:
        input_features_filename = "Prc_BEFL_2018_2019-06-14_bufm5.shp"
    elif year == 2019:
        #input_features_filename = "Prc_BEFL_2019_2019-06-25_bufm5.shp"
        input_features_filename = "Prc_BEFL_2019_2019-07-02_bufm5.shp"
    else:
        raise Exception(f"Not a valid year: {year}")

    # Calculate!
    input_parcel_filepath = conf.dirs.getpath(
        'input_dir') / input_features_filename
    ts_util.calculate_periodic_data(
        input_parcel_filepath=input_parcel_filepath,
        input_base_dir=timeseries_per_image_dir,
        start_date_str=f"{year}-03-15",
        end_date_str=f"{year}-08-15",
        #sensordata_to_get=conf.marker.getlist('sensordata_to_use'),
        sensordata_to_get=['SENSORDATA_S1_COHERENCE'],
        dest_data_dir=timeseries_periodic_dir,
        force=False)
예제 #4
0
def run_jobs():
    
    # First read the general config to get the job and models dir
    conf.read_config(config_filepaths=['config/general.ini', 'config/local_overrule.ini'], year=-1)
    jobs_dir = conf.dirs['job_dir']                                  
    models_dir = conf.dirs['model_dir']                                  
    
    # Get the jobs and treat them
    job_filepaths = sorted(glob.glob(os.path.join(jobs_dir, "*.ini")))
    for job_filepath in job_filepaths:      
        # Create configparser and read job file!
        job_config = configparser.ConfigParser(
                interpolation=configparser.ExtendedInterpolation(),
                allow_no_value=True)
        job_config.read(job_filepath)

        # Now get the info we want from the job config
        markertype_to_calc = job_config['job']['markertype_to_calc']
        input_parcel_filename = job_config['job']['input_parcel_filename']
        input_parcel_filetype = job_config['job']['input_parcel_filetype']
        year = job_config['job'].getint('year')
        country_code = job_config['job']['country_code']
        input_groundtruth_filename = job_config['job']['input_groundtruth_filename']
        input_model_to_use_relativepath = job_config['job']['input_model_to_use_relativepath']
        if input_model_to_use_relativepath is not None:
            input_model_to_use_filepath = os.path.join(models_dir, input_model_to_use_relativepath)
        else:
            input_model_to_use_filepath = None

        # Run!
        marker_runner.run(
                markertype_to_calc=markertype_to_calc,
                input_parcel_filename=input_parcel_filename,
                input_parcel_filetype=input_parcel_filetype,
                country_code=country_code,
                year=year,
                input_groundtruth_filename=input_groundtruth_filename,
                input_model_to_use_filepath=input_model_to_use_filepath)
예제 #5
0
def calc_marker_task(config_filepaths: List[Path], default_basedir: Path):
    """
    Runs a marker using the setting in the config_filepaths.
    
    Args:
        config_filepaths (List[Path]): the config files to load
        default_basedir (Path): the dir to resolve relative paths in the config
            file to.
    
    Raises:
        Exception: [description]
        Exception: [description]
    """
    # Read the configuration files
    conf.read_config(config_filepaths, default_basedir=default_basedir)

    # Create run dir to be used for the results
    reuse_last_run_dir = conf.calc_marker_params.getboolean(
        'reuse_last_run_dir')
    reuse_last_run_dir_config = conf.calc_marker_params.getboolean(
        'reuse_last_run_dir_config')
    run_dir = dir_helper.create_run_dir(conf.dirs.getpath('marker_dir'),
                                        reuse_last_run_dir)
    if not run_dir.exists():
        os.makedirs(run_dir)

    # Main initialisation of the logging
    logger = log_helper.main_log_init(run_dir, __name__)
    logger.info(
        f"Run dir with reuse_last_run_dir: {reuse_last_run_dir}, {run_dir}")
    logger.info(f"Config used: \n{conf.pformat_config()}")

    # If running in conda, export the environment
    conda_env = os.environ.get('CONDA_DEFAULT_ENV')
    if conda_env is not None:
        environment_yml_path = run_dir / f"{conda_env}.yml"
        logger.info(f"Export conda environment used to {environment_yml_path}")
        os.system(f"conda env export > {environment_yml_path}")

    # If the config needs to be reused as well, load it, else write it
    config_used_filepath = run_dir / 'config_used.ini'
    if (reuse_last_run_dir and reuse_last_run_dir_config and run_dir.exists()
            and config_used_filepath.exists()):
        config_filepaths.append(config_used_filepath)
        logger.info(
            f"Run dir config needs to be reused, so {config_filepaths}")
        conf.read_config(config_filepaths=config_filepaths,
                         default_basedir=default_basedir)
        logger.info(
            "Write new config_used.ini, because some parameters might have been added"
        )
        with open(config_used_filepath, 'w') as config_used_file:
            conf.config.write(config_used_file)
    else:
        # Copy the config files to a config dir for later notice
        configfiles_used_dir = run_dir / "configfiles_used"
        if configfiles_used_dir.exists():
            shutil.rmtree(configfiles_used_dir)
        configfiles_used_dir.mkdir()
        for config_filepath in config_filepaths:
            shutil.copy(config_filepath, configfiles_used_dir)

        # Write the resolved complete config, so it can be reused
        logger.info("Write config_used.ini, so it can be reused later on")
        with open(config_used_filepath, 'w') as config_used_file:
            conf.config.write(config_used_file)

    # Read the info about the run
    input_parcel_filename = conf.calc_marker_params.getpath(
        'input_parcel_filename')
    input_parcel_filetype = conf.calc_marker_params['input_parcel_filetype']
    country_code = conf.calc_marker_params['country_code']
    classes_refe_filename = conf.calc_marker_params.getpath(
        'classes_refe_filename')
    input_groundtruth_filename = conf.calc_marker_params.getpath(
        'input_groundtruth_filename')
    input_model_to_use_relativepath = conf.calc_marker_params.getpath(
        'input_model_to_use_relativepath')

    # Prepare input filepaths
    if input_model_to_use_relativepath is not None:
        input_model_to_use_filepath = conf.dirs.getpath(
            'model_dir') / input_model_to_use_relativepath
        if not input_model_to_use_filepath.exists():
            raise Exception(
                f"Input file input_model_to_use_filepath doesn't exist: {input_model_to_use_filepath}"
            )
    else:
        input_model_to_use_filepath = None

    input_dir = conf.dirs.getpath('input_dir')
    input_parcel_filepath = input_dir / input_parcel_filename
    if input_groundtruth_filename is not None:
        input_groundtruth_filepath = input_dir / input_groundtruth_filename
    else:
        input_groundtruth_filepath = None

    refe_dir = conf.dirs.getpath('refe_dir')
    classes_refe_filepath = refe_dir / classes_refe_filename

    # Check if the necessary input files exist...
    for path in [classes_refe_filepath, input_parcel_filepath]:
        if path is not None and not path.exists():
            message = f"Input file doesn't exist, so STOP: {path}"
            logger.critical(message)
            raise Exception(message)

    # Get some general config
    data_ext = conf.general['data_ext']
    output_ext = conf.general['output_ext']
    geofile_ext = conf.general['geofile_ext']

    #-------------------------------------------------------------
    # The real work
    #-------------------------------------------------------------
    # STEP 1: prepare parcel data for classification and image data extraction
    #-------------------------------------------------------------

    # Prepare the input data for optimal image data extraction:
    #    1) apply a negative buffer on the parcel to evade mixels
    #    2) remove features that became null because of buffer
    input_preprocessed_dir = conf.dirs.getpath('input_preprocessed_dir')
    buffer = conf.marker.getint('buffer')
    input_parcel_nogeo_filepath = input_preprocessed_dir / f"{input_parcel_filename.stem}{data_ext}"
    imagedata_input_parcel_filename = f"{input_parcel_filename.stem}_bufm{buffer}{geofile_ext}"
    imagedata_input_parcel_filepath = input_preprocessed_dir / imagedata_input_parcel_filename
    ts_util.prepare_input(
        input_parcel_filepath=input_parcel_filepath,
        output_imagedata_parcel_input_filepath=imagedata_input_parcel_filepath,
        output_parcel_nogeo_filepath=input_parcel_nogeo_filepath)

    # STEP 2: Get the timeseries data needed for the classification
    #-------------------------------------------------------------
    # Get the time series data (S1 and S2) to be used for the classification
    # Result: data is put in files in timeseries_periodic_dir, in one file per
    #         date/period
    timeseries_periodic_dir = conf.dirs.getpath('timeseries_periodic_dir')
    start_date_str = conf.marker['start_date_str']
    end_date_str = conf.marker['end_date_str']
    sensordata_to_use = conf.marker.getlist('sensordata_to_use')
    parceldata_aggregations_to_use = conf.marker.getlist(
        'parceldata_aggregations_to_use')
    base_filename = f"{input_parcel_filename.stem}_bufm{buffer}_weekly"
    ts.calc_timeseries_data(
        input_parcel_filepath=imagedata_input_parcel_filepath,
        input_country_code=country_code,
        start_date_str=start_date_str,
        end_date_str=end_date_str,
        sensordata_to_get=sensordata_to_use,
        base_filename=base_filename,
        dest_data_dir=timeseries_periodic_dir)

    # STEP 3: Preprocess all data needed for the classification
    #-------------------------------------------------------------
    # Prepare the basic input file with the classes that will be classified to.
    # Remarks:
    #    - this is typically specific for the input dataset and result wanted!!!
    #    - the result is/should be a file with the following columns
    #           - id (=id_column): unique ID for each parcel
    #           - classname (=class_column): the class that must
    #             be classified to.
    #             Remarks: - if in classes_to_ignore_for_train, class won't be used for training
    #                      - if in classes_to_ignore, the class will be ignored
    #           - pixcount:
    #             the number of S1/S2 pixels in the parcel.
    #             Is -1 if the parcel doesn't have any S1/S2 data.
    classtype_to_prepare = conf.preprocess['classtype_to_prepare']
    parcel_filepath = run_dir / f"{input_parcel_filename.stem}_parcel{data_ext}"
    parcel_pixcount_filepath = timeseries_periodic_dir / f"{base_filename}_pixcount{data_ext}"
    class_pre.prepare_input(
        input_parcel_filepath=input_parcel_nogeo_filepath,
        input_parcel_filetype=input_parcel_filetype,
        input_parcel_pixcount_filepath=parcel_pixcount_filepath,
        classtype_to_prepare=classtype_to_prepare,
        classes_refe_filepath=classes_refe_filepath,
        output_parcel_filepath=parcel_filepath)

    # Collect all data needed to do the classification in one input file
    parcel_classification_data_filepath = run_dir / f"{base_filename}_parcel_classdata{data_ext}"
    ts.collect_and_prepare_timeseries_data(
        input_parcel_filepath=input_parcel_nogeo_filepath,
        timeseries_dir=timeseries_periodic_dir,
        base_filename=base_filename,
        output_filepath=parcel_classification_data_filepath,
        start_date_str=start_date_str,
        end_date_str=end_date_str,
        sensordata_to_use=sensordata_to_use,
        parceldata_aggregations_to_use=parceldata_aggregations_to_use)

    # STEP 4: Train and test if necessary... and predict
    #-------------------------------------------------------------
    markertype = conf.marker.get('markertype')
    parcel_predictions_proba_all_filepath = run_dir / f"{base_filename}_predict_proba_all{data_ext}"
    classifier_ext = conf.classifier['classifier_ext']
    classifier_basefilepath = run_dir / f"{markertype}_01_mlp{classifier_ext}"

    # Check if a model exists already
    if input_model_to_use_filepath is None:
        best_model = mh.get_best_model(run_dir, acc_metric_mode='min')
        if best_model is not None:
            input_model_to_use_filepath = best_model['filepath']

    # if there is no model to use specified, train one!
    parcel_test_filepath = None
    parcel_predictions_proba_test_filepath = None
    if input_model_to_use_filepath is None:

        # Create the training sample...
        # Remark: this creates a list of representative test parcel + a list of (candidate) training parcel
        balancing_strategy = conf.marker['balancing_strategy']
        parcel_train_filepath = run_dir / f"{base_filename}_parcel_train{data_ext}"
        parcel_test_filepath = run_dir / f"{base_filename}_parcel_test{data_ext}"
        class_pre.create_train_test_sample(
            input_parcel_filepath=parcel_filepath,
            output_parcel_train_filepath=parcel_train_filepath,
            output_parcel_test_filepath=parcel_test_filepath,
            balancing_strategy=balancing_strategy)

        # Train the classifier and output predictions
        parcel_predictions_proba_test_filepath = run_dir / f"{base_filename}_predict_proba_test{data_ext}"
        classification.train_test_predict(
            input_parcel_train_filepath=parcel_train_filepath,
            input_parcel_test_filepath=parcel_test_filepath,
            input_parcel_all_filepath=parcel_filepath,
            input_parcel_classification_data_filepath=
            parcel_classification_data_filepath,
            output_classifier_basefilepath=classifier_basefilepath,
            output_predictions_test_filepath=
            parcel_predictions_proba_test_filepath,
            output_predictions_all_filepath=
            parcel_predictions_proba_all_filepath)
    else:
        # there is a classifier specified, so just use it!
        classification.predict(
            input_parcel_filepath=parcel_filepath,
            input_parcel_classification_data_filepath=
            parcel_classification_data_filepath,
            input_classifier_basefilepath=classifier_basefilepath,
            input_classifier_filepath=input_model_to_use_filepath,
            output_predictions_filepath=parcel_predictions_proba_all_filepath)

    # STEP 5: if necessary, do extra postprocessing
    #-------------------------------------------------------------
    '''if postprocess_to_groups is not None:
        # TODO 
    '''

    # STEP 6: do the default, mandatory postprocessing
    #-------------------------------------------------------------
    # If it was necessary to train, there will be a test prediction... so postprocess it
    parcel_predictions_test_filepath = None
    if (input_model_to_use_filepath is None
            and parcel_test_filepath is not None
            and parcel_predictions_proba_test_filepath is not None):
        parcel_predictions_test_filepath = run_dir / f"{base_filename}_predict_test{data_ext}"
        class_post.calc_top3_and_consolidation(
            input_parcel_filepath=parcel_test_filepath,
            input_parcel_probabilities_filepath=
            parcel_predictions_proba_test_filepath,
            output_predictions_filepath=parcel_predictions_test_filepath)

    # Postprocess predictions
    parcel_predictions_all_filepath = run_dir / f"{base_filename}_predict_all{data_ext}"
    parcel_predictions_all_output_filepath = run_dir / f"{base_filename}_predict_all_output{output_ext}"
    class_post.calc_top3_and_consolidation(
        input_parcel_filepath=parcel_filepath,
        input_parcel_probabilities_filepath=
        parcel_predictions_proba_all_filepath,
        output_predictions_filepath=parcel_predictions_all_filepath,
        output_predictions_output_filepath=
        parcel_predictions_all_output_filepath)

    # STEP 7: Report on the accuracy, incl. ground truth
    #-------------------------------------------------------------
    # Preprocess the ground truth data if it is provided
    groundtruth_filepath = None
    if input_groundtruth_filepath is not None:
        groundtruth_filepath = run_dir / f"{input_groundtruth_filepath.stem}_classes{input_groundtruth_filepath.suffix}"
        class_pre.prepare_input(
            input_parcel_filepath=input_groundtruth_filepath,
            input_parcel_filetype=input_parcel_filetype,
            input_parcel_pixcount_filepath=parcel_pixcount_filepath,
            classtype_to_prepare=conf.
            preprocess['classtype_to_prepare_groundtruth'],
            classes_refe_filepath=classes_refe_filepath,
            output_parcel_filepath=groundtruth_filepath)

    # If we trained a model, there is a test prediction we want to report on
    if input_model_to_use_filepath is None and parcel_predictions_test_filepath is not None:
        # Print full reporting on the accuracy of the test dataset
        report_txt = Path(
            f"{str(parcel_predictions_test_filepath)}_accuracy_report.txt")
        class_report.write_full_report(
            parcel_predictions_filepath=parcel_predictions_test_filepath,
            output_report_txt=report_txt,
            parcel_ground_truth_filepath=groundtruth_filepath)

    # Print full reporting on the accuracy of the full dataset
    report_txt = Path(
        f"{str(parcel_predictions_all_filepath)}_accuracy_report.txt")
    class_report.write_full_report(
        parcel_predictions_filepath=parcel_predictions_all_filepath,
        output_report_txt=report_txt,
        parcel_ground_truth_filepath=groundtruth_filepath)

    logging.shutdown()
예제 #6
0
def main():

    # Read the configuration
    segment_config_filepaths = [Path('../config/general.ini')]
    conf.read_config(segment_config_filepaths)

    # Main initialisation of the logging
    logger = log_helper.main_log_init(conf.dirs.getpath('log_dir'), __name__)
    logger.info("Start")
    logger.info(f"Config used: \n{conf.pformat_config()}")

    logger.info(pprint.pformat(dict(os.environ)))

    # Init variables
    #parcels_filepath = r"X:\GIS\GIS DATA\Percelen_ALP\Vlaanderen\Perc_VL_2019_2019-07-28\perc_2019_met_k_2019-07-28.shp"
    #overlap_filepath = r"X:\Monitoring\OrthoSeg\sealedsurfaces\output_vector\sealedsurfaces_10\sealedsurfaces_10_orig.gpkg"
    input_preprocessed_dir = conf.dirs.getpath('input_preprocessed_dir')
    parcels_filepath = input_preprocessed_dir / 'Prc_BEFL_2019_2019-07-02_bufm5_32632.gpkg'
    overlap_filepath = input_preprocessed_dir / 'Prc_BEFL_2019_2019-07-02_bufm5_32632.gpkg'

    # Read parcels file to memory (isn't that large...)
    #parcels_gpd = geofile_helper.read_file(parcels_filepath)

    # Loop over parcels and calculate overlap
    logger.info(f"Connect to {overlap_filepath}")
    conn = sqlite3.connect(str(overlap_filepath))
    conn.enable_load_extension(True)

    #now we can load the extension
    # depending on your OS and sqlite/spatialite version you might need to add
    # '.so' (Linux) or '.dll' (Windows) to the extension name

    #mod_spatialite (recommended)
    #conn.execute("SELECT load_extension('spatialite.dll')")
    conn.load_extension('mod_spatialite')
    conn.execute('SELECT InitSpatialMetaData(1);')
    """
    # libspatialite
    conn.execute('SELECT load_extension("libspatialite")')
    conn.execute('SELECT InitSpatialMetaData();')
    """

    c = conn.cursor()

    c.execute("SELECT sqlite_version()")
    for row in c:
        logger.info(f"test: {row}")

    c.execute("select name from sqlite_master where type = 'table'")
    for row in c:
        logger.info(f"Table: {row}")

    c.execute(
        """SELECT t.uid, t.fid, MbrMinX(t.geom), ST_GeometryType(t.geom), ST_AsText(GeomFromGPB(t.geom))
                 FROM info t
                 JOIN rtree_info_geom r ON t.fid = r.id
                 WHERE r.minx >= 50000
                   AND r.maxx <= 51000
            """)
    """SELECT t.fid, ST_AsText(t.geom)
            FROM info t
            JOIN rtree_info_geom r ON t.fid = r.id
    """
    """SELECT t.fid, AsText(t.geom)
            FROM "default" t
            JOIN rtree_default_geom r ON t.fid = r.id
        WHERE r.minx <= 200000
            AND r.maxx >= 205000
            AND r.miny <= 200000
            AND r.maxy >= 201000
    """

    logger.info(f"test")
    for i, row in enumerate(c):
        logger.info(f"test: {row}")
        if i >= 10:
            break
예제 #7
0
def run(markertype_to_calc: str,
        input_parcel_filename: str,
        input_parcel_filetype: str,
        country_code: str,
        year: int,
        input_groundtruth_filename: str,
        input_model_to_use_filepath: str):
    """
    Runs a marker for an input file. If no input model to use is specified,
    a new one will be trained.

    Args

    """

    # If a model to use is specified, check if it exists...
    if input_model_to_use_filepath is not None and not os.path.exists(input_model_to_use_filepath):
        raise Exception(f"Input file input_model_to_use_filepath doesn't exist: {input_model_to_use_filepath}")
    
    # Determine the config files to load depending on the marker_type
    marker_ini = f"config/{markertype_to_calc.lower()}.ini"
    config_filepaths = ["config/general.ini",
                        marker_ini,
                        "config/local_overrule.ini"]

    # Read the configuration files
    conf.read_config(config_filepaths, year=year)

    # Create run dir to be used for the results
    reuse_last_run_dir = conf.dirs.getboolean('reuse_last_run_dir')
    reuse_last_run_dir_config = conf.dirs.getboolean('reuse_last_run_dir_config')
    run_dir = dir_helper.create_run_dir(conf.dirs['marker_base_dir'], reuse_last_run_dir)
    if not os.path.exists(run_dir):
        os.makedirs(run_dir)

    # Main initialisation of the logging
    logger = log_helper.main_log_init(run_dir, __name__)      
    logger.info(f"Run dir with reuse_last_run_dir: {reuse_last_run_dir}, {run_dir}")
    logger.info(f"Config used: \n{conf.pformat_config()}")

    # If the config needs to be reused as well, load it, else write it
    config_used_filepath = os.path.join(run_dir, 'config_used.ini')
    if(reuse_last_run_dir 
       and reuse_last_run_dir_config
       and os.path.exists(run_dir)
       and os.path.exists(config_used_filepath)):
        config_filepaths.append(config_used_filepath)
        logger.info(f"Run dir config needs to be reused, so {config_filepaths}")
        conf.read_config(config_filepaths=config_filepaths, year=year)
        logger.info("Write new config_used.ini, because some parameters might have been added")
        with open(config_used_filepath, 'w') as config_used_file:
            conf.config.write(config_used_file)
    else:
        logger.info("Write config_used.ini, so it can be reused later on")
        with open(config_used_filepath, 'w') as config_used_file:
            conf.config.write(config_used_file)

    # Prepare input filepaths
    input_dir = conf.dirs['input_dir']    
    input_parcel_filepath = os.path.join(input_dir, input_parcel_filename)
    if input_groundtruth_filename is not None:
        input_groundtruth_filepath = os.path.join(input_dir, input_groundtruth_filename)
    else:
        input_groundtruth_filepath = None

    # Check if the necessary input files exist...
    if not os.path.exists(input_parcel_filepath):
        message = f"The parcel input file doesn't exist, so STOP: {input_parcel_filepath}"
        logger.critical(message)
        raise Exception(message)

    # Get some general config
    data_ext = conf.general['data_ext']
    output_ext = conf.general['output_ext']
    geofile_ext = conf.general['geofile_ext']
       
    #-------------------------------------------------------------
    # The real work
    #-------------------------------------------------------------
    # STEP 1: prepare parcel data for classification and image data extraction
    #-------------------------------------------------------------

    # Prepare the input data for optimal image data extraction:
    #    1) apply a negative buffer on the parcel to evade mixels
    #    2) remove features that became null because of buffer
    input_preprocessed_dir = conf.dirs['input_preprocessed_dir']
    input_parcel_filename_noext, _ = os.path.splitext(input_parcel_filename)
    buffer = conf.marker.getint('buffer')       
    input_parcel_nogeo_filepath = os.path.join(
            input_preprocessed_dir, f"{input_parcel_filename_noext}{data_ext}")
    imagedata_input_parcel_filename_noext = f"{input_parcel_filename_noext}_bufm{buffer}"
    imagedata_input_parcel_filepath = os.path.join(
            input_preprocessed_dir, f"{imagedata_input_parcel_filename_noext}{geofile_ext}")
    ts_util.prepare_input(
            input_parcel_filepath=input_parcel_filepath,
            output_imagedata_parcel_input_filepath=imagedata_input_parcel_filepath,
            output_parcel_nogeo_filepath=input_parcel_nogeo_filepath)

    # STEP 2: Get the timeseries data needed for the classification
    #-------------------------------------------------------------
    # Get the time series data (S1 and S2) to be used for the classification 
    # Result: data is put in files in timeseries_periodic_dir, in one file per 
    #         date/period
    timeseries_periodic_dir = conf.dirs['timeseries_periodic_dir']
    start_date_str = conf.marker['start_date_str']
    end_date_str = conf.marker['end_date_str']
    sensordata_to_use = conf.marker.getlist('sensordata_to_use')
    parceldata_aggregations_to_use = conf.marker.getlist('parceldata_aggregations_to_use')
    base_filename = f"{input_parcel_filename_noext}_bufm{buffer}_weekly"
    ts.calc_timeseries_data(
            input_parcel_filepath=imagedata_input_parcel_filepath,
            input_country_code=country_code,
            start_date_str=start_date_str,
            end_date_str=end_date_str,
            sensordata_to_get=sensordata_to_use,
            base_filename=base_filename,
            dest_data_dir=timeseries_periodic_dir)

    # STEP 3: Preprocess all data needed for the classification
    #-------------------------------------------------------------
    # Prepare the basic input file with the classes that will be classified to.
    # Remarks:
    #    - this is typically specific for the input dataset and result wanted!!!
    #    - the result is/should be a file with the following columns
    #           - id (=global_settings.id_column): unique ID for each parcel
    #           - classname (=global_settings.class_column): the class that must 
    #             be classified to.
    #             Remarks: - if in classes_to_ignore_for_train, class won't be used for training
    #                      - if in classes_to_ignore, the class will be ignored
    #           - pixcount (=global_settings.pixcount_s1s2_column):  
    #             the number of S1/S2 pixels in the parcel.
    #             Is -1 if the parcel doesn't have any S1/S2 data.
    classtype_to_prepare = conf.preprocess['classtype_to_prepare']
    parcel_filepath = os.path.join(
            run_dir, f"{input_parcel_filename_noext}_parcel{data_ext}")
    parcel_pixcount_filepath = os.path.join(
            timeseries_periodic_dir, f"{base_filename}_pixcount{data_ext}")
    class_pre.prepare_input(
            input_parcel_filepath=input_parcel_nogeo_filepath,
            input_parcel_filetype=input_parcel_filetype,
            input_parcel_pixcount_filepath=parcel_pixcount_filepath,
            classtype_to_prepare=classtype_to_prepare,
            output_parcel_filepath=parcel_filepath)

    # Collect all data needed to do the classification in one input file
    parcel_classification_data_filepath = os.path.join(
            run_dir, f"{base_filename}_parcel_classdata{data_ext}")
    ts.collect_and_prepare_timeseries_data(
            input_parcel_filepath=input_parcel_nogeo_filepath,
            timeseries_dir=timeseries_periodic_dir,
            base_filename=base_filename,
            output_filepath=parcel_classification_data_filepath,
            start_date_str=start_date_str,
            end_date_str=end_date_str,
            sensordata_to_use=sensordata_to_use,
            parceldata_aggregations_to_use=parceldata_aggregations_to_use)

    # STEP 4: Train and test if necessary... and predict
    #-------------------------------------------------------------
    parcel_predictions_proba_all_filepath = os.path.join(
            run_dir, f"{base_filename}_predict_proba_all{data_ext}")

    # if there is no model to use specified, train one!
    if input_model_to_use_filepath is None:

        # Create the training sample...
        # Remark: this creates a list of representative test parcel + a list of (candidate) training parcel
        balancing_strategy = conf.marker['balancing_strategy']
        parcel_train_filepath = os.path.join(run_dir, 
                f"{base_filename}_parcel_train{data_ext}")
        parcel_test_filepath = os.path.join(
                run_dir, f"{base_filename}_parcel_test{data_ext}")
        class_pre.create_train_test_sample(
                input_parcel_filepath=parcel_filepath,
                output_parcel_train_filepath=parcel_train_filepath,
                output_parcel_test_filepath=parcel_test_filepath,
                balancing_strategy=balancing_strategy)

        # Train the classifier and output predictions
        classifier_ext = conf.classifier['classifier_ext']
        classifier_filepath = os.path.splitext(parcel_train_filepath)[0] + f"_classifier{classifier_ext}"
        parcel_predictions_proba_test_filepath = os.path.join(
                run_dir, f"{base_filename}_predict_proba_test{data_ext}")
        classification.train_test_predict(
                input_parcel_train_filepath=parcel_train_filepath,
                input_parcel_test_filepath=parcel_test_filepath,
                input_parcel_all_filepath=parcel_filepath,
                input_parcel_classification_data_filepath=parcel_classification_data_filepath,
                output_classifier_filepath=classifier_filepath,
                output_predictions_test_filepath=parcel_predictions_proba_test_filepath,
                output_predictions_all_filepath=parcel_predictions_proba_all_filepath)
    else:
        # there is a classifier specified, so just use it!
        classification.predict(
                input_parcel_filepath=parcel_filepath,
                input_parcel_classification_data_filepath=parcel_classification_data_filepath,
                input_classifier_filepath=input_model_to_use_filepath,
                output_predictions_filepath=parcel_predictions_proba_all_filepath)

    # STEP 5: if necessary, do extra postprocessing
    #-------------------------------------------------------------    
    '''if postprocess_to_groups is not None:
        # TODO 
    '''

    # STEP 6: do the default, mandatory postprocessing
    #-------------------------------------------------------------
    # If it was necessary to train, there will be a test prediction... so postprocess it
    if input_model_to_use_filepath is None:
        parcel_predictions_test_filepath = os.path.join(
                run_dir, f"{base_filename}_predict_test{data_ext}")
        class_post.calc_top3_and_consolidation(
                input_parcel_filepath=parcel_test_filepath,
                input_parcel_probabilities_filepath=parcel_predictions_proba_test_filepath,
                output_predictions_filepath=parcel_predictions_test_filepath)
        
    # Postprocess predictions
    parcel_predictions_all_filepath = os.path.join(
            run_dir, f"{base_filename}_predict_all{data_ext}")
    parcel_predictions_all_output_filepath = os.path.join(
            run_dir, f"{base_filename}_predict_all_output{output_ext}")
    class_post.calc_top3_and_consolidation(
            input_parcel_filepath=parcel_filepath,
            input_parcel_probabilities_filepath=parcel_predictions_proba_all_filepath,
            output_predictions_filepath=parcel_predictions_all_filepath,
            output_predictions_output_filepath=parcel_predictions_all_output_filepath)

    # STEP 7: Report on the accuracy, incl. ground truth
    #-------------------------------------------------------------
    # Preprocess the ground truth data if it is provided
    groundtruth_filepath = None
    if input_groundtruth_filepath is not None:
            _, input_gt_filename = os.path.split(input_groundtruth_filepath)
            input_gt_filename_noext, input_gt_filename_ext = os.path.splitext(input_gt_filename)
            groundtruth_filepath = os.path.join(
                    run_dir, f"{input_gt_filename_noext}_classes{input_gt_filename_ext}")
            class_pre.prepare_input(
                    input_parcel_filepath=input_groundtruth_filepath,
                    input_parcel_filetype=input_parcel_filetype,
                    input_parcel_pixcount_filepath=parcel_pixcount_filepath,
                    classtype_to_prepare=conf.preprocess['classtype_to_prepare_groundtruth'],
                    output_parcel_filepath=groundtruth_filepath)

    # If we trained a model, there is a test prediction we want to report on
    if input_model_to_use_filepath is None:
        # Print full reporting on the accuracy of the test dataset
        report_txt = f"{parcel_predictions_test_filepath}_accuracy_report.txt"
        class_report.write_full_report(
                parcel_predictions_filepath=parcel_predictions_test_filepath,
                output_report_txt=report_txt,
                parcel_ground_truth_filepath=groundtruth_filepath)

    # Print full reporting on the accuracy of the full dataset
    report_txt = f"{parcel_predictions_all_filepath}_accuracy_report.txt"
    class_report.write_full_report(
            parcel_predictions_filepath=parcel_predictions_all_filepath,
            output_report_txt=report_txt,
            parcel_ground_truth_filepath=groundtruth_filepath)

    logging.shutdown()
    
예제 #8
0
def main():

    # Determine the config files to load depending on the marker_type
    config_filepaths = [
        "../config/general.ini", "../config/local_overrule_linux.ini"
    ]

    test = False

    # Specify the date range:
    years = [2018, 2019]
    month_start = 3
    month_stop = 8
    for year in years:

        # Read the configuration files
        conf.read_config(config_filepaths, year=year)

        # Get the general output dir
        input_preprocessed_dir = conf.dirs['input_preprocessed_dir']
        timeseries_per_image_dir = conf.dirs['timeseries_per_image_dir']

        # Init logging
        if not test:
            base_log_dir = conf.dirs['log_dir']
        else:
            base_log_dir = conf.dirs['log_dir'] + '_test'
        log_dir = f"{base_log_dir}{os.sep}calc_dias_{datetime.now():%Y-%m-%d_%H-%M-%S}"

        # Clean test log dir if it exist
        if test and os.path.exists(base_log_dir):
            shutil.rmtree(base_log_dir)

        global logger
        logger = log_helper.main_log_init(log_dir, __name__)
        logger.info(f"Config used: \n{conf.pformat_config()}")

        if test:
            logger.info(
                f"As we are testing, clean all test logging and use new log_dir: {log_dir}"
            )

        # Write the consolidated config as ini file again to the run dir
        config_used_filepath = os.path.join(log_dir, 'config_used.ini')
        with open(config_used_filepath, 'w') as config_used_file:
            conf.config.write(config_used_file)

        # Input features file depends on the year
        if year == 2017:
            input_features_filename = "Prc_BEFL_2017_2019-06-14_bufm5.shp"
        elif year == 2018:
            input_features_filename = "Prc_BEFL_2018_2019-06-14_bufm5.shp"
        elif year == 2019:
            #input_features_filename = "Prc_BEFL_2019_2019-06-25_bufm5.shp"
            input_features_filename = "Prc_BEFL_2019_2019-07-02_bufm5.shp"
        else:
            raise Exception(f"Not a valid year: {year}")
        input_features_filepath = os.path.join(input_preprocessed_dir,
                                               input_features_filename)

        # Init output dir
        if not test:
            output_basedir = timeseries_per_image_dir
        else:
            output_basedir = timeseries_per_image_dir + '_test'
            logger.info(
                f"As we are testing, use test output basedir: {output_basedir}"
            )
        input_features_filename_noext = os.path.splitext(
            input_features_filename)[0]
        output_dir = os.path.join(output_basedir,
                                  input_features_filename_noext)
        if test:
            if os.path.exists(output_dir):
                logger.info(
                    f"As we are only testing, clean the output dir: {output_dir}"
                )
                # By adding a / at the end, only the contents are recursively deleted
                shutil.rmtree(output_dir + os.sep)

        # Temp dir + clean contents from it.
        temp_dir = conf.dirs['temp_dir'] + os.sep + 'calc_dias'
        logger.info(f"Clean the temp dir {temp_dir}")
        if os.path.exists(temp_dir):
            # By adding a / at the end, only the contents are recursively deleted
            shutil.rmtree(temp_dir + os.sep)
        """
        # TEST to extract exact footprint from S1 image...
        filepath = "/mnt/NAS3/CARD/FLANDERS/S1A/L1TC/2017/01/01/S1A_IW_GRDH_1SDV_20170101T055005_20170101T055030_014634_017CB9_Orb_RBN_RTN_Cal_TC.CARD/S1A_IW_GRDH_1SDV_20170101T055005_20170101T055030_014634_017CB9_Orb_RBN_RTN_Cal_TC.data/Gamma0_VH.img"
        image = rasterio.open(filepath)
        geoms = list(rasterio.features.dataset_features(src=image, as_mask=True, precision=5))
        footprint = gpd.GeoDataFrame.from_features(geoms)        
        logger.info(footprint)
        footprint = footprint.simplify(0.00001)        
        logger.info(footprint)
        logger.info("Ready")
        # Start calculation
        """

        ##### Process S1 GRD images #####
        input_image_filepaths = []
        for i in range(month_start, month_stop + 1):
            input_image_searchstr = f"/mnt/NAS3/CARD/FLANDERS/S1*/L1TC/{year}/{i:02d}/*/*.CARD"
            input_image_filepaths.extend(glob.glob(input_image_searchstr))
        logger.info(
            f"Found {len(input_image_filepaths)} S1 GRD images to process")

        if test:
            # Take only the x first images found while testing

            #input_image_filepaths = input_image_filepaths[:10]
            input_image_filepaths = []
            input_image_filepaths.append(
                "/mnt/NAS3/CARD/FLANDERS/S1A/L1TC/2018/04/09/S1A_IW_GRDH_1SDV_20180409T054153_20180409T054218_021386_024D13_D824_Orb_RBN_RTN_Cal_TC_20190612T171437.L1TC.CARD"
            )
            input_image_filepaths.append(
                "/mnt/NAS3/CARD/FLANDERS/S1A/L1TC/2018/04/22/S1A_IW_GRDH_1SDV_20180422T173236_20180422T173301_021583_025328_99D1_Orb_RBN_RTN_Cal_TC_20190612T171441.L1TC.CARD"
            )

            logger.info(
                f"As we are only testing, process only {len(input_image_filepaths)} test images"
            )

        calc_ts.calc_stats_per_image(features_filepath=input_features_filepath,
                                     id_column=conf.columns['id'],
                                     image_paths=input_image_filepaths,
                                     bands=['VV', 'VH'],
                                     output_dir=output_dir,
                                     temp_dir=temp_dir,
                                     log_dir=log_dir)

        ##### Process S2 images #####
        input_image_filepaths = []
        for i in range(month_start, month_stop + 1):
            input_image_searchstr = f"/mnt/NAS3/CARD/FLANDERS/S2*/L2A/{year}/{i:02d}/*/*.SAFE"
            input_image_filepaths.extend(glob.glob(input_image_searchstr))
        logger.info(f"Found {len(input_image_filepaths)} S2 images to process")

        if test:
            # Take only the x first images found while testing
            input_image_filepaths = input_image_filepaths[:10]
            logger.info(
                f"As we are only testing, process only {len(input_image_filepaths)} test images"
            )

        # TODO: refactor underlying code so the SCL band is used regardless of it being passed here
        max_cloudcover_pct = conf.timeseries.getfloat('max_cloudcover_pct')
        calc_ts.calc_stats_per_image(
            features_filepath=input_features_filepath,
            id_column=conf.columns['id'],
            image_paths=input_image_filepaths,
            bands=['B02-10m', 'B03-10m', 'B04-10m', 'B08-10m', 'SCL-20m'],
            output_dir=output_dir,
            temp_dir=temp_dir,
            log_dir=log_dir,
            max_cloudcover_pct=max_cloudcover_pct)

        ##### Process S1 Coherence images #####
        input_image_filepaths = []
        for i in range(month_start, month_stop + 1):
            input_image_searchstr = f"/mnt/NAS3/CARD/FLANDERS/S1*/L1CO/{year}/{i:02d}/*/*.CARD"
            input_image_filepaths.extend(glob.glob(input_image_searchstr))
        logger.info(
            f"Found {len(input_image_filepaths)} S1 Coherence images to process"
        )

        if test:
            # Take only the x first images found while testing
            input_image_filepaths = input_image_filepaths[:10]
            logger.info(
                f"As we are only testing, process only {len(input_image_filepaths)} test images"
            )

        calc_ts.calc_stats_per_image(features_filepath=input_features_filepath,
                                     id_column=conf.columns['id'],
                                     image_paths=input_image_filepaths,
                                     bands=['VV', 'VH'],
                                     output_dir=output_dir,
                                     temp_dir=temp_dir,
                                     log_dir=log_dir)