def convert_tif_to_hdf(input_path, output_path, key_path="entry/data", crop=(0, 0, 0, 0), pattern=None, **options): """ Convert a folder of tif files to a hdf/nxs file. Parameters ---------- input_path : str Folder path to the tif files. output_path : str Path to the hdf/nxs file. key_path : str, optional Key path to the dataset. crop : tuple of int, optional Crop the images from the edges, i.e. crop = (crop_top, crop_bottom, crop_left, crop_right). pattern : str, optional Used to find tif files with names matching the pattern. options : dict, optional Add metadata. E.g. options={"entry/angles": angles, "entry/energy": 53}. Returns ------- str Path to the hdf/nxs file. """ if pattern is None: list_file = losa.find_file(input_path + "/*.tif*") else: list_file = losa.find_file(input_path + "/*" + pattern + "*.tif*") depth = len(list_file) if depth == 0: raise ValueError("No tif files in the folder: {}".format(input_path)) (height, width) = np.shape(losa.load_image(list_file[0])) file_base, file_ext = os.path.splitext(output_path) if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): file_ext = '.hdf' output_path = file_base + file_ext cr_top, cr_bottom, cr_left, cr_right = crop cr_height = height - cr_top - cr_bottom cr_width = width - cr_left - cr_right data_out = losa.open_hdf_stream(output_path, (depth, cr_height, cr_width), key_path=key_path, overwrite=True, **options) for i, fname in enumerate(list_file): data_out[i] = losa.load_image(fname)[cr_top:cr_height + cr_top, cr_left:cr_width + cr_left] return output_path
def test_load_image(self): file_path = "data/img.tif" losa.save_image(file_path, np.random.rand(64, 64)) mat = losa.load_image(file_path) self.assertTrue(len(mat.shape) == 2)
def get_statical_information_dataset(input_, percentile=(5, 95), skip=5, denoise=False, key_path=None): """ Get statical information of a dataset. This can be a folder of tif files, a hdf file, or a 3D array. Parameters ---------- input_ : str, hdf file, or array_like It can be a folder path to tif files, a hdf file, or a 3D array. percentile : tuple of floats Tuple of (min_percentile, max_percentile) to compute. Must be between 0 and 100 inclusive. skip : int Skipping step of reading input. denoise: bool, optional Enable/disable denoising before extracting statistical information. key_path : str, optional Key path to the dataset if the input is the hdf file. Returns ------- gmin : float The global minimum value of the data array. gmax : float The global maximum value of the data array. min_percent : float The global min of the first computed percentile of the data array. max_percent : tuple of floats The global min of the last computed percentile of the data array. mean : float The mean of the data array. median : float The median of the data array. variance : float The mean of the variance of the data array. """ if isinstance(input_, str) and (os.path.splitext(input_)[-1] == ""): list_file = losa.find_file(input_ + "/*.tif*") depth = len(list_file) if depth == 0: raise ValueError("No tif files in the folder: {}".format(input_)) list_stat = [] for i in range(0, depth, skip): mat = losa.load_image(list_file[i]) if denoise is True: mat = gaussian_filter(mat, 2) list_stat.append(get_statical_information(mat, percentile, denoise)) else: if isinstance(input_, str): file_ext = os.path.splitext(input_)[-1] if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError( "Can't open this type of file format {}".format(file_ext)) if key_path is None: raise ValueError( "Please provide the key path to the dataset!!!") input_ = losa.load_hdf(input_, key_path) depth = len(input_) list_stat = [] for i in range(0, depth, skip): mat = input_[i] if denoise is True: mat = gaussian_filter(mat, 2) list_stat.append(get_statical_information(mat, percentile, denoise)) list_stat = np.asarray(list_stat) gmin = np.min(list_stat[:, 0]) gmax = np.max(list_stat[:, 1]) min_percent = np.min(list_stat[:, 2]) max_percent = np.max(list_stat[:, 3]) median = np.median(list_stat[:, 4]) mean = np.mean(list_stat[:, 5]) variance = np.mean(list_stat[:, 6]) return gmin, gmax, min_percent, max_percent, mean, median, variance
def rescale_dataset(input_, output, nbit=16, minmax=None, skip=None, key_path=None): """ Rescale a dataset to 8-bit or 16-bit data-type. The dataset can be a folder of tif files, a hdf file, or a 3D array. Parameters ---------- input_ : str, array_like It can be a folder path to tif files, a hdf file, or 3D array. output : str, None It can be a folder path, a hdf file path, or None (memory consuming). nbit : {8,16} Rescaled data-type: 8-bit or 16-bit. minmax : tuple of float, or None Minimum and maximum values used for rescaling. They are calculated if None is given. skip : int or None Skipping step of reading input used for getting statistical information. key_path : str, optional Key path to the dataset if the input is the hdf file. Returns ------- array_like or None If output is None, returning an 3D array. """ if output is not None: file_base, file_ext = os.path.splitext(output) if file_ext != "": file_base = os.path.dirname(output) if os.path.exists(file_base): raise ValueError("Folder exists!!! Please choose another path!!!") if isinstance(input_, str) and (os.path.splitext(input_)[-1] == ""): list_file = losa.find_file(input_ + "/*.tif*") depth = len(list_file) if depth == 0: raise ValueError("No tif files in the folder: {}".format(input_)) if minmax is None: if skip is None: skip = int(np.ceil(0.15 * depth)) (gmin, gmax) = get_statical_information_dataset(input_, skip=skip)[0:2] else: (gmin, gmax) = minmax if output is not None: file_base, file_ext = os.path.splitext(output) if file_ext != "": if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError("File extension must be hdf, h5, or nxs") output = file_base + file_ext (height, width) = np.shape(losa.load_image(list_file[0])) if nbit == 8: data_type = "uint8" else: data_type = "uint16" data_out = losa.open_hdf_stream(output, (depth, height, width), key_path="rescale/data", data_type=data_type, overwrite=False) data_res = [] for i in range(0, depth): mat = rescale(losa.load_image(list_file[i]), nbit=nbit, minmax=(gmin, gmax)) if output is None: data_res.append(mat) else: file_base, file_ext = os.path.splitext(output) if file_ext == "": out_name = "0000" + str(i) losa.save_image(output + "/img_" + out_name[-5:] + ".tif", mat) else: data_out[i] = mat else: if isinstance(input_, str): file_ext = os.path.splitext(input_)[-1] if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError( "Can't open this type of file format {}".format(file_ext)) if key_path is None: raise ValueError( "Please provide the key path to the dataset!!!") input_ = losa.load_hdf(input_, key_path) (depth, height, width) = input_.shape if minmax is None: if skip is None: skip = int(np.ceil(0.15 * depth)) (gmin, gmax) = get_statical_information_dataset(input_, skip=skip, key_path=key_path)[0:2] else: (gmin, gmax) = minmax data_res = [] if output is not None: file_base, file_ext = os.path.splitext(output) if file_ext != "": if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError("File extension must be hdf, h5, or nxs") output = file_base + file_ext if nbit == 8: data_type = "uint8" else: data_type = "uint16" data_out = losa.open_hdf_stream(output, (depth, height, width), key_path="rescale/data", data_type=data_type, overwrite=False) for i in range(0, depth): mat = rescale(input_[i], nbit=nbit, minmax=(gmin, gmax)) if output is None: data_res.append(mat) else: file_base, file_ext = os.path.splitext(output) if file_ext != "": data_out[i] = mat else: out_name = "0000" + str(i) losa.save_image(output + "/img_" + out_name[-5:] + ".tif", mat) if output is None: return np.asarray(data_res)
def downsample_dataset(input_, output, cell_size, method="mean", key_path=None): """ Downsample a dataset. This can be a folder of tif files, a hdf file, or a 3D array. Parameters ---------- input_ : str, array_like It can be a folder path to tif files, a hdf file, or 3D array. output : str, None It can be a folder path, a hdf file path, or None (memory consuming). cell_size : int or tuple of int Window size along axes used for grouping pixels. method : {"mean", "median", "max", "min"} Downsampling method. key_path : str, optional Key path to the dataset if the input is the hdf file. Returns ------- array_like or None If output is None, returning an 3D array. """ if output is not None: file_base, file_ext = os.path.splitext(output) if file_ext != "": file_base = os.path.dirname(output) if os.path.exists(file_base): raise ValueError("Folder exists!!! Please choose another path!!!") if method == "median": dsp_method = np.median elif method == "max": dsp_method = np.max elif method == "min": dsp_method = np.amin else: dsp_method = np.mean if isinstance(cell_size, int): cell_size = (cell_size, cell_size, cell_size) if isinstance(input_, str) and (os.path.splitext(input_)[-1] == ""): list_file = losa.find_file(input_ + "/*.tif*") depth = len(list_file) if depth == 0: raise ValueError("No tif files in the folder: {}".format(input_)) (height, width) = np.shape(losa.load_image(list_file[0])) depth_dsp = depth // cell_size[0] height_dsp = height // cell_size[1] width_dsp = width // cell_size[2] num = 0 if (depth_dsp != 0) and (height_dsp != 0) and (width_dsp != 0): if output is not None: file_base, file_ext = os.path.splitext(output) if file_ext != "": if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError( "File extension must be hdf, h5, or nxs") output = file_base + file_ext data_out = losa.open_hdf_stream( output, (depth_dsp, height_dsp, width_dsp), key_path="downsample/data", overwrite=False) data_dsp = [] for i in range(0, depth, cell_size[0]): if (i + cell_size[0]) > depth: break else: mat = [] for j in range(i, i + cell_size[0]): mat.append(losa.load_image(list_file[j])) mat = np.asarray(mat) mat = mat[:, :height_dsp * cell_size[1], :width_dsp * cell_size[2]] mat = mat.reshape(1, cell_size[0], height_dsp, cell_size[1], width_dsp, cell_size[2]) mat_dsp = dsp_method(dsp_method(dsp_method(mat, axis=-1), axis=1), axis=2) if output is None: data_dsp.append(mat_dsp[0]) else: if file_ext == "": out_name = "0000" + str(num) losa.save_image( output + "/img_" + out_name[-5:] + ".tif", mat_dsp[0]) else: data_out[num] = mat_dsp[0] num += 1 else: raise ValueError("Incorrect cell size {}".format(cell_size)) else: if isinstance(input_, str): file_ext = os.path.splitext(input_)[-1] if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError( "Can't open this type of file format {}".format(file_ext)) if key_path is None: raise ValueError( "Please provide the key path to the dataset!!!") input_ = losa.load_hdf(input_, key_path) (depth, height, width) = input_.shape depth_dsp = depth // cell_size[0] height_dsp = height // cell_size[1] width_dsp = width // cell_size[2] if (depth_dsp != 0) and (height_dsp != 0) and (width_dsp != 0): if output is None: input_ = input_[:depth_dsp * cell_size[0], :height_dsp * cell_size[1], :width_dsp * cell_size[2]] input_ = input_.reshape(depth_dsp, cell_size[0], height_dsp, cell_size[1], width_dsp, cell_size[2]) data_dsp = dsp_method(dsp_method(dsp_method(input_, axis=-1), axis=1), axis=2) else: file_base, file_ext = os.path.splitext(output) if file_ext != "": if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"): raise ValueError( "File extension must be hdf, h5, or nxs") output = file_base + file_ext data_out = losa.open_hdf_stream( output, (depth_dsp, height_dsp, width_dsp), key_path="downsample/data", overwrite=False) num = 0 for i in range(0, depth, cell_size[0]): if (i + cell_size[0]) > depth: break else: mat = input_[i:i + cell_size[0], :height_dsp * cell_size[1], :width_dsp * cell_size[2]] mat = mat.reshape(1, cell_size[0], height_dsp, cell_size[1], width_dsp, cell_size[2]) mat_dsp = dsp_method(dsp_method(dsp_method(mat, axis=-1), axis=1), axis=2) if file_ext != "": data_out[num] = mat_dsp[0] else: out_name = "0000" + str(num) losa.save_image( output + "/img_" + out_name[-5:] + ".tif", mat_dsp[0]) num += 1 else: raise ValueError("Incorrect cell size {}".format(cell_size)) if output is None: return np.asarray(data_dsp)
import timeit import numpy as np import multiprocessing as mp import algotom.io.loadersaver as losa import algotom.prep.correction as corr from joblib import Parallel, delayed scan_num = 129441 input_base = "/dls/i13/data/2021/mg26241-2/raw/" output_base = "/dls/i13/data/2021/mg26241-2/processing/preprocessed/" file_path = input_base + str(scan_num) + ".nxs" ofile_name = str(scan_num) + "_processed.nxs" # Load an MTF window determined outside Algotom. mtf_win = losa.load_image("/dls/i12/data/2020/cm26476-4/processing/mtf_window.tif") mtf_pad = 150 # Load distortion coefficients determined using the Discorpy package. xcenter, ycenter, list_fact = losa.load_distortion_coefficient( "/dls/i12/data/2020/cm26476-4/processing/coefficients_bw.txt") ncore = mp.cpu_count() - 1 # To process data in parallel. chunk = 32 # Number of images to be loaded and processed in one go. # Crop images after the distortion correction (pincushion type) to remove # unwanted values around the edges. crop_top = 20 crop_bottom = 20 crop_left = 20 crop_right = 20