Esempio n. 1
0
def gdal_8bit_rescale(infile, outfile, overwrite=False):
    """
    Rescale to 8 bit the input image. Uses gdal_translate.
    :param infile: Path
        Image to scale
    :param outfile: Path
        Scaled image
    :param overwrite: Bool
        Overwrite if output file already exist
    :return: Path
        Scaled raster file name
    """
    error = None

    if validate_file_exists(outfile) and not overwrite:
        logging.warning(
            f"8Bit file already exists: {outfile.name}. Will not overwrite")
        return error

    else:
        options_list = ['-ot Byte', '-of GTiff', '-scale']
        options_string = " ".join(options_list)

        gdal.Translate(str(outfile), str(infile), options=options_string)

        if not validate_file_exists(outfile):
            error = f"ERROR: Could not scale {str(outfile)}"
            logging.error(error)

    return error
Esempio n. 2
0
def gdal_8bit_rescale(tile_info: TileInfo, overwrite=False):
    """
    Rescale to 8 bit the input image. Uses gdal_translate.
    :param tile_info: TileInfo
        Image to scale
    :param overwrite: Bool
        Overwrite if output file already exist
    :return: Path
        Scaled raster file name
    """
    error = None
    infile = tile_info.last_processed_fp
    outfile_name = Path(str(infile.stem).replace(f"_{tile_info.dtype}", "_uint8.tif")) \
        if str(infile.stem).endswith(f"_{tile_info.dtype}") \
        else f"{str(infile.stem)}_uint8.tif"
    outfile = tile_info.parent_folder / tile_info.image_folder / tile_info.prep_folder / outfile_name

    if validate_file_exists(outfile) and not overwrite:
        logging.warning(f"8Bit file already exists: {outfile.name}. Will not overwrite")
        return outfile, error

    else:
        try:
            options_list = ['-ot Byte', '-of GTiff', '-scale']
            options_string = " ".join(options_list)

            gdal.Translate(str(outfile), str(infile), options=options_string)
        except:
            error = f"ERROR: Could not scale {str(outfile)}"

    return Path(outfile), error
Esempio n. 3
0
def rasterio_merge_tiles(tile_list, outfile, overwrite: bool = False):
    """
    Merge in a single tif file, multiples tifs from a list.
    :param tile_list: list
        List of Path to images to merge.
    :param outfile: Path
        Path to merged image
    :param overwrite: bool
    :return: Path
        Merged raster file name
    """
    error = None

    if validate_file_exists(outfile) and not overwrite:
        logging.warning(
            f"Merge file already exists: {outfile.name}. Will not overwrite")
        return Path(outfile), error

    try:
        # Open all tiles.
        sources = [rasterio.open(raster) for raster in tile_list]
    except rasterio.errors.RasterioIOError as err:
        logging.error(err)
        return Path(outfile), err

    # Merge
    mosaic, out_trans = merge(sources)
    # Copy the metadata
    out_meta = sources[0].meta.copy()

    # Update the metadata
    out_meta.update({
        "driver": "GTiff",
        "height": mosaic.shape[1],
        "width": mosaic.shape[2],
        "transform": out_trans
    })
    # Write merged image
    with rasterio.open(outfile, "w", **out_meta) as dest:
        dest.write(mosaic)
    if not validate_file_exists(outfile):
        error = f"Could not merge image {outfile.stem}"
        logging.error(error)

    return Path(outfile), error
Esempio n. 4
0
def gdal_split_band(im_name, img_file, xml_file, overwrite: bool = False):
    """
    Split multi band file into single band files.
    :param img_file: ImageInfo
        Image
    :param xml_file: Path
        Path to xml
    :param overwrite: bool
        Overwrite files if they already exists.
    :return: List of written files.
    """
    list_band_order, err = get_band_order(str(xml_file))
    error = []
    list_band_file = []
    if err is None:
        for elem in list_band_order:

            out_filename = Path(f"{im_name}_{img_file.stem}_{elem}.tif")
            out_filepath = img_file.parent / out_filename

            if validate_file_exists(out_filepath) and not overwrite:
                logging.warning(
                    f"{elem} file already exists: {out_filepath.name}. Will not overwrite"
                )
                continue

            else:
                band_num = list_band_order.index(elem) + 1
                band_option = f"-b {band_num}"
                options_list = ['-of GTiff', band_option]
                options_string = " ".join(options_list)
                gdal.Translate(str(out_filepath),
                               str(img_file),
                               options=options_string)

                if not validate_file_exists(out_filepath):
                    err = f"Could not write singleband image {str(out_filepath)}"
                    error.append(err)
                    logging.error(err)
            list_band_file.append(out_filepath)
    else:
        error.append(err)
        logging.error(err)
    return list_band_file, error
Esempio n. 5
0
 def _check_repost_unsent_values(self):
     if not utils.validate_file_exists(_DB_FAILED_WRITES):
         return
     utils.v_print('Had values which were not successfully sent.')
     all_sent_ok = True
     for data in self._load_locals():
         if self._connection_ok:
             utils.v_print(f"ReSending dropped data: {data}")
             self._write_remote(data)
         else:
             all_sent_ok = False
             self._write_local(data)
     if all_sent_ok:
         print('Successfully pushed, all previously failed db writes, to server.')
         os.remove(_DB_FAILED_WRITES)
Esempio n. 6
0
    def __init__(self):
        # Configure
        if utils.validate_file_exists(_CONFIG_FILE_NAME):
            config = utils.get_json_from_file(_CONFIG_FILE_NAME)
        else:
            utils.validate_can_write_file(_CONFIG_FILE_NAME,
                                          should_del_after=True)
            self._configure_sensor(_DEFAULT_SENSOR_CONFIG)
            config = self.first_time_setup(_DEFAULT_SENSOR_CONFIG)
        self._configure_sensor(config)

        # Populate properties based on config
        self._data = utils.DataCapture()
        self.humidity_baseline = config['humidity']['baseline']
        self.humidity_gas_quality_ratio = config['humidity'][
            'quality_weighting']
        self.gas_baseline = config['gas']['ambient_background']
        self.__cpu = config['cpu']
        self.__cpu['smoothing'] = []
Esempio n. 7
0
def rasterio_merge_tiles(image_info: ImageInfo,
                         overwrite: bool = False):
    """
    Merge in a single tif file, multiples tifs from a list.
    :param image_info: ImageInfo
        Image
    :param overwrite: bool
    :return: Path
        Merged raster file name
    """
    error = None
    p = re.compile('R\wC\w')
    outfile_name = p.sub('Merge', str(image_info.tile_list[0].stem)) + ".tif"
    outfile = str(image_info.parent_folder / image_info.image_folder / image_info.prep_folder) / Path(outfile_name)

    if validate_file_exists(outfile) and not overwrite:
        logging.warning(f"Merge file already exists: {outfile.name}. Will not overwrite")
        return Path(outfile), error

    try:
        # Open all tiles.
        sources = [rasterio.open(raster) for raster in image_info.tile_list]

        # Merge
        mosaic, out_trans = merge(sources)
        # Copy the metadata
        out_meta = sources[0].meta.copy()

        # Update the metadata
        out_meta.update({"driver": "GTiff",
                         "height": mosaic.shape[1],
                         "width": mosaic.shape[2],
                         "transform": out_trans})
        # Write merged image
        with rasterio.open(outfile, "w", **out_meta) as dest:
            dest.write(mosaic)
    except:
        error = f"Could not merge image {image_info.image_folder}"

    return Path(outfile), error
Esempio n. 8
0
def gdal_split_band(image: ImageInfo,
                    overwrite: bool = False):
    """
    Split multi band file into single band files.
    :param image: ImageInfo
        Image
    :param overwrite: bool
        Overwrite files if they already exists.
    :return: List of written files.
    """
    list_band_order, err = get_band_order(str(image.mul_xml))
    error = []
    infile = image.merge_img_fp
    list_band_file = []
    if err is None:
        for elem in list_band_order:

            out_filename = f"{image.merge_img_fp.stem}_{elem}.tif"
            out_filepath = image.parent_folder / image.image_folder / image.prep_folder / Path(out_filename)

            if validate_file_exists(out_filepath) and not overwrite:
                logging.warning(f"{elem} file already exists: {out_filepath.name}. Will not overwrite")
                continue

            else:
                band_num = list_band_order.index(elem) + 1
                band_option = f"-b {band_num}"
                options_list = ['-of GTiff', band_option]
                options_string = " ".join(options_list)
                try:
                    gdal.Translate(str(out_filepath), str(infile), options=options_string)
                except:
                    error.append(f"Could not write singleband image {str(out_filepath)}")
            list_band_file.append(out_filepath)
    else:
        error.append(err)
    return list_band_file, error
Esempio n. 9
0
        #
        # del a_ds
        assessment_splitter(assessment_data, destination_filename, a_buckets, i)

    print('done!')

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--version', action='version', version='v0.1.9')
    parser.add_argument('-p', '--patient_data',
                        help='the location and name of the patient data csv file')
    parser.add_argument('-a', '--assessment_data',
                        help='the location and name of the assessment data csv file')
    parser.add_argument('-b', '--bucket_size', type=int, default=500000,
                        help='the number of patients to include in a bucket')

    args = parser.parse_args()
    if args.bucket_size < 10000:
        print('--bucket_size cannot be less than 10000')
        exit(-1)

    utils.validate_file_exists(args.patient_data)
    utils.validate_file_exists(args.assessment_data)

    try:
        split_data(args.patient_data, args.assessment_data, args.bucket_size)
    except Exception as e:
        print(e)
        exit(-1)
Esempio n. 10
0
def tile_list_glob(base_dir: str,
                   mul_pan_glob: List[str] = [],
                   mul_pan_str: List[str] = [],
                   psh_glob: List[str] = [],
                   extensions: List[str] = []):
    """
    Glob through specified directories for (1) pairs of multispectral and panchromatic rasters or (2) pansharp rasters.
    Save as csv and/or return as list.
    :param base_dir: str
        Base directory where globbing will occur.
    :param mul_pan_glob: list of str
        List of list of patterns linking multispectral and panchrom. rasters. Patterns are a two-item list:
        (1) glob pattern to reach multispectral raster (excluding extension);
        (2) pattern to panchrom. raster from multispectral raster,
        e.g.: ["**/*_MUL/*-M*_P00?", "../*_PAN"].
    :param mul_pan_str: list of str
        List of list of string sections that identify multispectral and panchrom. rasters inside filename,
        e.g. [['-M', '-P'],["_MSI", "_PAN"]].
    :param psh_glob: list of str
        List of glob patterns to find panchromatic rasters.
    :param extensions: list of str
        List of extensions (suffixes) the raster files may bear, e.g. ["tif", "ntf"].
    :return:
        list of lists (rows) containing info about files found, output pansharp name (if applies) and more.
    """
    assert len(mul_pan_glob) == len(
        mul_pan_str
    ), "Missing info about multispectral and panchromatic images"

    # Reorganize mul/pan glob and str info as list of lists each containing a tuple.
    # e.g. [('Sherbrooke/**/*_MUL/*-M*_P00?', '../*_PAN'), ('-M', '-P')]. See pansharp_glob()'s docstring for more info.
    mul_pan_info_list = [[tuple(mul_pan_glob[x]),
                          tuple(mul_pan_str[x])] for x in mul_pan_glob]

    os.chdir(base_dir)  # Work in base directory

    import logging.config
    out_log_path = Path("./logs")
    out_log_path.mkdir(exist_ok=True)
    logging.basicConfig(filename='logs/prep_glob.log', level=logging.DEBUG)
    logging.info("Started")

    base_dir_res = Path(base_dir).resolve(
    )  # Resolved path is useful in section 2 (search for panchromatic).

    glob_output_list = []

    # 1. GLOB to all multispectral images in base directory using inputted pattern. Create generator from glob search.
    ################################################################################
    for mul_pan_info, ext in product(
            mul_pan_info_list, extensions
    ):  # FIXME: if list is empty, Nonetype will cause TypeError
        mul_glob_pattern = mul_pan_info[0][0] + "." + ext
        # FIXME: there may be compatibilty issues with glob's case sensitivity in Linux. Working ok on Windows.
        # More info: https://jdhao.github.io/2019/06/24/python_glob_case_sensitivity/
        mul_glob = base_dir_res.glob(mul_glob_pattern)

        # Loop through glob generator object and retrieve xml in multispectral folder
        for mul_xml in tqdm(mul_glob,
                            desc='Iterating through multispectral xml'
                            ):  # mul_raster being a Path object
            mul_rel = Path(mul_xml).relative_to(
                base_dir_res)  # Use only relative paths from here

            image_folder = mul_rel.parents[1]
            mul_rel = Path(mul_xml).relative_to(base_dir_res / image_folder)

            err_mgs = []
            length_err = "Check absolute path length. May exceed 260 characters."
            if not validate_file_exists(image_folder / mul_rel):
                err_mgs.append(length_err)

            # get tile list from xml
            lst_mul_tiles = get_tiles_from_xml(mul_xml)

            # 2. Find panchromatic image with relative glob pattern from multispectral pattern
            ################################################################################
            pan_glob_pattern = mul_pan_info[0][1] + "/*." + ext
            # assume panchromatic file has same extension as multispectral
            pan_glob = sorted(
                (image_folder / mul_rel.parent).glob(pan_glob_pattern))
            if len(pan_glob) == 0:
                missing_pan = f"The provided glob pattern {pan_glob_pattern} could not locate a potential" \
                              f"panchromatic raster to match {mul_rel} in image folder {image_folder}."
                logging.warning(missing_pan)
                err_mgs.append(missing_pan)
                continue
            # Replace string that identifies the raster as a multispectral for one identifying panchromatic raster
            pan_best_guess = str(mul_rel.name).replace(mul_pan_info[1][0],
                                                       mul_pan_info[1][1])
            # Guess the panchromatic image's path using directory from glob results above. This file may not exist.
            pan_best_guess_rel_path = (pan_glob[0].parent.resolve() /
                                       pan_best_guess).relative_to(
                                           base_dir_res / image_folder)
            # Make a list of strings from paths given by glob results above.
            pan_str = []
            for potential_pan in pan_glob:
                # Resolve paths to avoid path length problems in Windows,
                # i.e. discard all relative references (ex.: "mul_dir/../pan_dir") making path longer
                pot_pan_dir = potential_pan.parent.resolve()
                pot_pan_rel = pot_pan_dir.joinpath(
                    potential_pan.name).relative_to(base_dir_res /
                                                    image_folder)
                pan_str.append(str(pot_pan_rel))
            # Get closest match between guessed name for panchromatic image and glob file names
            pan_rel = Path(
                get_close_matches(str(pan_best_guess_rel_path), pan_str)[0])
            if validate_file_exists(image_folder / pan_rel):
                lst_pan_tiles = get_tiles_from_xml(image_folder / pan_rel)
            else:
                no_panchro_err = f"Panchromatic xml not found to match multispectral xml {mul_rel}"
                logging.warning(no_panchro_err)
                err_mgs.append(no_panchro_err)
                continue

            # Check both mul and pan lists are the same length.
            if len(lst_mul_tiles) != len(lst_pan_tiles):
                xml_err = f"The number of tiles in multispectral and panchromatic xmls do not match for image {image_folder}."
                logging.warning(xml_err)
                err_mgs.append(xml_err)
                continue

            process_steps = ['psh']
            if len(lst_mul_tiles) > 1:
                process_steps.append('merge')
            elif len(lst_mul_tiles) == 0:
                xml_err = f"Could not find any tile in xmls for image {image_folder}."
                logging.warning(xml_err)

            try:
                with rasterio_raster_reader(
                        str(mul_xml.parent / Path(lst_mul_tiles[0]))
                ) as src:  # Set output dtype as original multispectral dtype
                    dtype = src.meta["dtype"]
            except rasterio.errors.RasterioIOError as e:
                logging.warning(e)
                continue

            logging.debug(f"\nMultispectral: {mul_rel}\n"
                          f"Panchromatic: {pan_rel}\n"
                          f"Multispectral datatype: {dtype}\n")

            # Determine output path
            p = re.compile('_M\w\w')
            output_path = Path(p.sub('_PREP', str(mul_rel.parent)))
            output_prep_path = Path(base_dir) / image_folder / output_path
            output_prep_path.mkdir(exist_ok=True)
            if not output_prep_path.is_dir():
                raise ValueError(f"Could not create folder {output_prep_path}")

            if dtype != 'uint8':
                process_steps.append('scale')

            mul_tile_list = [
                Path(base_dir) / image_folder / mul_rel.parent / Path(elem)
                for elem in lst_mul_tiles
            ]
            pan_tile_list = [
                Path(base_dir) / image_folder / pan_rel.parent / Path(elem)
                for elem in lst_pan_tiles
            ]

            im_name = get_img_name_from_img_folder(
                str(image_folder).split('/')[0])

            # create new row and append to existing records in glob_output_list.
            img_info = ImageInfo(parent_folder=Path(base_dir),
                                 image_folder=image_folder,
                                 im_name=im_name,
                                 prep_folder=output_path,
                                 mul_tile_list=mul_tile_list,
                                 pan_tile_list=pan_tile_list,
                                 mul_xml=mul_rel,
                                 pan_xml=pan_rel,
                                 mul_pan_info=mul_pan_info,
                                 process_steps=process_steps,
                                 dtype=dtype)

            glob_output_list.append(img_info)

    mul_pan_pairs_ct = len(glob_output_list)
    logging.info(
        f"Found {mul_pan_pairs_ct} pair(s) of multispectral and panchromatic rasters with provided parameters"
    )

    # 3. Find already pansharped images with a certain name pattern
    ################################################################################
    if psh_glob:  # if config file contains any search pattern, glob.
        for psh_glob_item, ext in product(psh_glob, extensions):
            psh_glob_pattern = psh_glob_item + "." + ext
            psh_xml_glob = base_dir_res.glob(psh_glob_pattern)
            for psh_xml in tqdm(
                    psh_xml_glob,
                    desc="Iterating through already pansharped images"):

                psh_rel = Path(psh_xml).relative_to(
                    base_dir_res)  # Use only relative paths
                image_folder = psh_rel.parents[1]
                psh_rel = Path(psh_xml).relative_to(base_dir_res /
                                                    image_folder)

                if validate_file_exists(psh_xml):
                    lst_psh_tiles = get_tiles_from_xml(psh_xml)
                else:
                    no_xml_err = f"No XML file found in {psh_xml}"
                    logging.warning(no_xml_err)
                    continue

                process_steps = []
                if len(lst_psh_tiles) > 1:
                    process_steps.append('merge')
                elif len(lst_psh_tiles) == 0:
                    xml_err = f"Could not find any tile in xmls for image {image_folder}."
                    logging.warning(xml_err)

                try:
                    with rasterio_raster_reader(
                            str(psh_xml.parent /
                                Path(lst_psh_tiles[0]))) as src:
                        psh_dtype = src.meta["dtype"]
                except rasterio.errors.RasterioIOError as e:
                    logging.warning(e)
                    continue

                # Determine output path
                output_path = Path(
                    '_'.join(str(psh_rel.parent).split('_')[:-1]) + '_PREP')

                output_prep_path = Path(base_dir) / image_folder / output_path
                output_prep_path.mkdir(exist_ok=True)

                logging.debug(f"\nPansharp image found: {psh_rel}\n")

                if psh_dtype != 'uint8':
                    process_steps.append('scale')

                im_name = get_img_name_from_img_folder(
                    str(image_folder).split('/')[0])

                psh_tile_list = [
                    Path(base_dir) / image_folder / psh_rel.parent / Path(elem)
                    for elem in lst_psh_tiles
                ]
                img_info = ImageInfo(parent_folder=Path(base_dir),
                                     image_folder=image_folder,
                                     im_name=im_name,
                                     prep_folder=output_path,
                                     psh_tile_list=psh_tile_list,
                                     dtype=psh_dtype,
                                     psh_xml=psh_xml,
                                     process_steps=process_steps,
                                     mul_pan_info=psh_glob_pattern)

                glob_output_list.append(img_info)

    psh_ct = len(glob_output_list) - mul_pan_pairs_ct
    logging.info(
        f'Found {psh_ct} pansharped raster(s) with provided parameters')

    return glob_output_list
Esempio n. 11
0
def tile_list_glob(base_dir: str,
                   mul_pan_glob: List[str] = [],
                   mul_pan_str: List[str] = [],
                   psh_glob: List[str] = [],
                   extensions: List[str] = [],
                   out_csv: str = ""):
    """
    Glob through specified directories for (1) pairs of multispectral and panchromatic rasters or (2) pansharp rasters.
    Save as csv and/or return as list.
    :param base_dir: str
        Base directory where globbing will occur.
    :param mul_pan_glob: list of str
        List of list of patterns linking multispectral and panchrom. rasters. Patterns are a two-item list:
        (1) glob pattern to reach multispectral raster (excluding extension);
        (2) pattern to panchrom. raster from multispectral raster,
        e.g.: ["**/*_MUL/*-M*_P00?", "../*_PAN"].
    :param mul_pan_str: list of str
        List of list of string sections that identify multispectral and panchrom. rasters inside filename,
        e.g. [['-M', '-P'],["_MSI", "_PAN"]].
    :param psh_glob: list of str
        List of glob patterns to find panchromatic rasters.
    :param extensions: list of str
        List of extensions (suffixes) the raster files may bear, e.g. ["tif", "ntf"].
    :param out_csv: str
        Output csv where info about processed files and log messages will be saved.
    :return:
        list of lists (rows) containing info about files found, output pansharp name (if applies) and more.
    """
    assert len(mul_pan_glob) == len(
        mul_pan_str
    ), "Missing info about multispectral and panchromatic images"

    # Reorganize mul/pan glob and str info as list of lists each containing a tuple.
    # e.g. [('Sherbrooke/**/*_MUL/*-M*_P00?', '../*_PAN'), ('-M', '-P')]. See pansharp_glob()'s docstring for more info.
    mul_pan_info_list = [[tuple(mul_pan_glob[x]),
                          tuple(mul_pan_str[x])] for x in mul_pan_glob]

    os.chdir(
        "/home/valhass/Projects/preprocess-gdl/")  # Work in base directory

    # TODO: test execution of preprocess_glob.py
    import logging.config
    out_log_path = Path("/home/valhass/Projects/preprocess-gdl/logs")
    Path.mkdir(out_log_path, parents=True, exist_ok=True)
    # out_log_path.mkdir(parents=True, exist_ok=True)
    logging.basicConfig(filename='logs/prep_glob.log', level=logging.DEBUG)
    logging.info("Started")

    base_dir_res = Path(base_dir).resolve(
    )  # Resolved path is useful in section 2 (search for panchromatic).

    if out_csv != "":
        out_csv = CsvLogger(out_csv=out_csv, info_type='tile')

    glob_output_list = []

    # 1. GLOB to all multispectral images in base directory using inputted pattern. Create generator from glob search.
    ################################################################################
    for mul_pan_info, ext in product(
            mul_pan_info_list, extensions
    ):  # FIXME: if list is empty, Nonetype will cause TypeError
        mul_glob_pattern = mul_pan_info[0][0] + "." + ext
        # FIXME: there may be compatibilty issues with glob's case sensitivity in Linux. Working ok on Windows.
        # More info: https://jdhao.github.io/2019/06/24/python_glob_case_sensitivity/
        mul_rasters_glob = base_dir_res.glob(mul_glob_pattern)

        # Loop through glob generator object and retrieve individual multispectral images
        for mul_raster in tqdm(mul_rasters_glob,
                               desc='Iterating through multispectral images'
                               ):  # mul_raster being a Path object
            mul_raster_rel = Path(mul_raster).relative_to(
                base_dir_res)  # Use only relative paths from here
            print(mul_raster_rel)

            image_folder = mul_raster_rel.parents[1]
            mul_raster_rel = Path(mul_raster).relative_to(base_dir_res /
                                                          image_folder)

            err_mgs = []
            length_err = "Check absolute path length. May exceed 260 characters."
            if not validate_file_exists(image_folder / mul_raster_rel):
                err_mgs.append(length_err)

            # 2. Find panchromatic image with relative glob pattern from multispectral pattern
            ################################################################################
            pan_glob_pattern = mul_pan_info[0][1] + "/*." + ext
            # assume panchromatic file has same extension as multispectral
            pan_rasters_glob = sorted(
                (image_folder / mul_raster_rel.parent).glob(pan_glob_pattern))
            if len(pan_rasters_glob) == 0:
                missing_pan = f"The provided glob pattern {pan_glob_pattern} could not locate a potential" \
                              f"panchromatic raster to match {mul_raster_rel}." \
                              f"Skipping to next multispectral raster..."
                logging.warning(missing_pan)
                err_mgs.append(missing_pan)
                continue
            # Replace string that identifies the raster as a multispectral for one identifying panchromatic raster
            pan_best_guess = str(mul_raster_rel.name).replace(
                mul_pan_info[1][0], mul_pan_info[1][1])
            # Guess the panchromatic image's path using directory from glob results above. This file may not exist.
            pan_best_guess_rel_path = (pan_rasters_glob[0].parent.resolve() /
                                       pan_best_guess).relative_to(
                                           base_dir_res / image_folder)
            # Make a list of strings from paths given by glob results above.
            pan_rasters_str = []
            for potential_pan in pan_rasters_glob:
                # Resolve paths to avoid path length problems in Windows,
                # i.e. discard all relative references (ex.: "mul_dir/../pan_dir") making path longer
                pot_pan_dir = potential_pan.parent.resolve()
                pot_pan_rel = pot_pan_dir.joinpath(
                    potential_pan.name).relative_to(base_dir_res /
                                                    image_folder)
                pan_rasters_str.append(str(pot_pan_rel))
            # Get closest match between guessed name for panchromatic image and glob file names
            pan_raster_rel = Path(
                get_close_matches(str(pan_best_guess_rel_path),
                                  pan_rasters_str)[0])
            print(pan_raster_rel)
            if not validate_file_exists(image_folder / pan_raster_rel):
                no_panchro_err = f"Panchromatic raster not found to match multispectral raster {mul_raster_rel}"
                logging.warning(no_panchro_err)
                err_mgs.append(no_panchro_err)
                continue

            # 3. Define parameters for future pansharp (and more), now that we've found mul/pan pair.
            ################################################################################
            try:
                raster = rasterio_raster_reader(
                    str(image_folder / mul_raster_rel)
                )  # Set output dtype as original multispectral dtype
            except rasterio.errors.RasterioIOError as e:
                logging.warning(e)
                continue
            dtype = raster.meta["dtype"]

            logging.debug(f"\nMultispectral image: {mul_raster_rel}\n"
                          f"Panchromatic image found: {pan_raster_rel}\n"
                          f"Multispectral datatype: {dtype}\n")

            # # Determine output path
            p = re.compile('_M\w\w')
            output_path = Path(p.sub('_PREP', str(mul_raster_rel.parent)))
            output_prep_path = Path(base_dir) / image_folder / output_path
            output_prep_path.mkdir(exist_ok=True)
            if not output_prep_path.is_dir():
                raise ValueError(f"Could not create folder {output_prep_path}")

            process_steps = ['psh']
            if dtype != 'uint8':
                process_steps.append('scale')

            p = re.compile('_R\wC\w')
            mul_xml_name = Path(p.sub('', str(mul_raster_rel.stem)) + '.XML')
            mul_xml = Path(
                base_dir) / image_folder / mul_raster_rel.parent / mul_xml_name
            if not validate_file_exists(mul_xml):
                no_xml_err = f"No XML file found in {mul_xml}"
                logging.warning(no_xml_err)
                err_mgs.append(no_xml_err)
                continue

            # create new row and append to existing records in glob_output_list.
            tile_info = TileInfo(parent_folder=Path(base_dir),
                                 image_folder=image_folder,
                                 mul_pan_patern=mul_pan_info,
                                 mul_tile=mul_raster_rel,
                                 pan_tile=pan_raster_rel,
                                 prep_folder=output_path,
                                 dtype=dtype,
                                 process_steps=process_steps,
                                 mul_xml=mul_xml)
            # row = [str(base_dir), str(mul_raster_rel), str(pan_raster_rel), dtype, str(output_psh_rel), pansharp_method,
            #        str(output_cog_rel), err_mgs]
            # glob_output_list.append(tuple(row))
            glob_output_list.append(tile_info)
            out_csv.write_row(tile_info)

    mul_pan_pairs_ct = len(glob_output_list)
    logging.info(
        f"Found {mul_pan_pairs_ct} pair(s) of multispectral and panchromatic rasters with provided parameters"
    )

    # 4. Find already pansharped images with a certain name pattern
    ################################################################################
    if psh_glob:  # if config file contains any search pattern, glob.
        for psh_glob_item, ext in product(psh_glob, extensions):
            psh_glob_pattern = psh_glob_item + "." + ext
            psh_rasters_glob = base_dir_res.glob(psh_glob_pattern)
            for psh_raster in tqdm(
                    psh_rasters_glob,
                    desc="Iterating through already pansharped images"):
                try:
                    raster = rasterio_raster_reader(
                        str(psh_raster)
                    )  # Set output dtype as original multispectral dtype
                except rasterio.errors.RasterioIOError as e:
                    logging.warning(e)
                    continue
                psh_dtype = raster.meta["dtype"]

                psh_raster_rel = Path(psh_raster).relative_to(
                    base_dir_res)  # Use only relative paths
                image_folder = psh_raster_rel.parents[1]
                psh_raster_rel = Path(psh_raster).relative_to(base_dir_res /
                                                              image_folder)

                # # Determine output path

                output_path = Path(
                    '_'.join(str(psh_raster_rel.parent).split('_')[:-1]) +
                    '_PREP')

                output_prep_path = Path(base_dir) / image_folder / output_path
                output_prep_path.mkdir(exist_ok=True)

                # output_cog_rel = psh_raster_rel.parent / (psh_raster_rel.stem + "-" + psh_dtype + "-cog" + psh_raster_rel.suffix)
                logging.debug(f"\nPansharp image found: {psh_raster_rel}\n")

                p = re.compile('_R\wC\w')
                mul_xml_name = Path(
                    p.sub('', str(psh_raster_rel.stem)) + '.XML')
                mul_xml = Path(
                    base_dir
                ) / image_folder / psh_raster_rel.parent / mul_xml_name
                if not validate_file_exists(mul_xml):
                    no_xml_err = f"No XML file found in {mul_xml}"
                    logging.warning(no_xml_err)
                    continue

                process_steps = []
                if psh_dtype != 'uint8':
                    process_steps.append('scale')
                tile_info = TileInfo(parent_folder=Path(base_dir),
                                     image_folder=image_folder,
                                     psh_tile=psh_raster_rel,
                                     prep_folder=output_path,
                                     dtype=psh_dtype,
                                     process_steps=process_steps,
                                     last_processed_fp=Path(base_dir) /
                                     image_folder / psh_raster_rel,
                                     mul_xml=mul_xml,
                                     mul_pan_patern=['None'])

                glob_output_list.append(tile_info)
                out_csv.write_row(tile_info)

    psh_ct = len(glob_output_list) - mul_pan_pairs_ct
    logging.info(
        f'Found {psh_ct} pansharped raster(s) with provided parameters')

    return glob_output_list