Example #1
0
def convert_tif_to_hdf(input_path,
                       output_path,
                       key_path="entry/data",
                       crop=(0, 0, 0, 0),
                       pattern=None,
                       **options):
    """
    Convert a folder of tif files to a hdf/nxs file.

    Parameters
    ----------
    input_path : str
        Folder path to the tif files.
    output_path : str
        Path to the hdf/nxs file.
    key_path : str, optional
        Key path to the dataset.
    crop : tuple of int, optional
        Crop the images from the edges, i.e.
        crop = (crop_top, crop_bottom, crop_left, crop_right).
    pattern : str, optional
        Used to find tif files with names matching the pattern.
    options : dict, optional
        Add metadata. E.g. options={"entry/angles": angles, "entry/energy": 53}.

    Returns
    -------
    str
        Path to the hdf/nxs file.
    """
    if pattern is None:
        list_file = losa.find_file(input_path + "/*.tif*")
    else:
        list_file = losa.find_file(input_path + "/*" + pattern + "*.tif*")
    depth = len(list_file)
    if depth == 0:
        raise ValueError("No tif files in the folder: {}".format(input_path))
    (height, width) = np.shape(losa.load_image(list_file[0]))
    file_base, file_ext = os.path.splitext(output_path)
    if not (file_ext == '.hdf' or file_ext == '.h5' or file_ext == ".nxs"):
        file_ext = '.hdf'
    output_path = file_base + file_ext
    cr_top, cr_bottom, cr_left, cr_right = crop
    cr_height = height - cr_top - cr_bottom
    cr_width = width - cr_left - cr_right
    data_out = losa.open_hdf_stream(output_path, (depth, cr_height, cr_width),
                                    key_path=key_path,
                                    overwrite=True,
                                    **options)
    for i, fname in enumerate(list_file):
        data_out[i] = losa.load_image(fname)[cr_top:cr_height + cr_top,
                                             cr_left:cr_width + cr_left]
    return output_path
Example #2
0
 def test_load_image(self):
     file_path = "data/img.tif"
     losa.save_image(file_path, np.random.rand(64, 64))
     mat = losa.load_image(file_path)
     self.assertTrue(len(mat.shape) == 2)
Example #3
0
def get_statical_information_dataset(input_,
                                     percentile=(5, 95),
                                     skip=5,
                                     denoise=False,
                                     key_path=None):
    """
    Get statical information of a dataset. This can be a folder of tif files,
    a hdf file, or a 3D array.

    Parameters
    ----------
    input_ : str, hdf file, or array_like
        It can be a folder path to tif files, a hdf file, or a 3D array.
    percentile : tuple of floats
        Tuple of (min_percentile, max_percentile) to compute.
        Must be between 0 and 100 inclusive.
    skip : int
        Skipping step of reading input.
    denoise: bool, optional
        Enable/disable denoising before extracting statistical information.
    key_path : str, optional
        Key path to the dataset if the input is the hdf file.

    Returns
    -------
    gmin : float
        The global minimum value of the data array.
    gmax : float
        The global maximum value of the data array.
    min_percent : float
        The global min of the first computed percentile of the data array.
    max_percent : tuple of floats
        The global min of the last computed percentile of the data array.
    mean : float
        The mean of the data array.
    median : float
        The median of the data array.
    variance : float
        The mean of the variance of the data array.
    """
    if isinstance(input_, str) and (os.path.splitext(input_)[-1] == ""):
        list_file = losa.find_file(input_ + "/*.tif*")
        depth = len(list_file)
        if depth == 0:
            raise ValueError("No tif files in the folder: {}".format(input_))
        list_stat = []
        for i in range(0, depth, skip):
            mat = losa.load_image(list_file[i])
            if denoise is True:
                mat = gaussian_filter(mat, 2)
            list_stat.append(get_statical_information(mat, percentile,
                                                      denoise))
    else:
        if isinstance(input_, str):
            file_ext = os.path.splitext(input_)[-1]
            if not (file_ext == '.hdf' or file_ext == '.h5'
                    or file_ext == ".nxs"):
                raise ValueError(
                    "Can't open this type of file format {}".format(file_ext))
            if key_path is None:
                raise ValueError(
                    "Please provide the key path to the dataset!!!")
            input_ = losa.load_hdf(input_, key_path)
        depth = len(input_)
        list_stat = []
        for i in range(0, depth, skip):
            mat = input_[i]
            if denoise is True:
                mat = gaussian_filter(mat, 2)
            list_stat.append(get_statical_information(mat, percentile,
                                                      denoise))
    list_stat = np.asarray(list_stat)
    gmin = np.min(list_stat[:, 0])
    gmax = np.max(list_stat[:, 1])
    min_percent = np.min(list_stat[:, 2])
    max_percent = np.max(list_stat[:, 3])
    median = np.median(list_stat[:, 4])
    mean = np.mean(list_stat[:, 5])
    variance = np.mean(list_stat[:, 6])
    return gmin, gmax, min_percent, max_percent, mean, median, variance
Example #4
0
def rescale_dataset(input_,
                    output,
                    nbit=16,
                    minmax=None,
                    skip=None,
                    key_path=None):
    """
    Rescale a dataset to 8-bit or 16-bit data-type. The dataset can be a
    folder of tif files, a hdf file, or a 3D array.

    Parameters
    ----------
    input_ : str, array_like
        It can be a folder path to tif files, a hdf file, or 3D array.
    output : str, None
        It can be a folder path, a hdf file path, or None (memory consuming).
    nbit : {8,16}
        Rescaled data-type: 8-bit or 16-bit.
    minmax : tuple of float, or None
        Minimum and maximum values used for rescaling. They are calculated if
        None is given.
    skip : int or None
        Skipping step of reading input used for getting statistical information.
    key_path : str, optional
        Key path to the dataset if the input is the hdf file.

    Returns
    -------
    array_like or None
        If output is None, returning an 3D array.
    """
    if output is not None:
        file_base, file_ext = os.path.splitext(output)
        if file_ext != "":
            file_base = os.path.dirname(output)
        if os.path.exists(file_base):
            raise ValueError("Folder exists!!! Please choose another path!!!")
    if isinstance(input_, str) and (os.path.splitext(input_)[-1] == ""):
        list_file = losa.find_file(input_ + "/*.tif*")
        depth = len(list_file)
        if depth == 0:
            raise ValueError("No tif files in the folder: {}".format(input_))
        if minmax is None:
            if skip is None:
                skip = int(np.ceil(0.15 * depth))
            (gmin, gmax) = get_statical_information_dataset(input_,
                                                            skip=skip)[0:2]
        else:
            (gmin, gmax) = minmax
        if output is not None:
            file_base, file_ext = os.path.splitext(output)
            if file_ext != "":
                if not (file_ext == '.hdf' or file_ext == '.h5'
                        or file_ext == ".nxs"):
                    raise ValueError("File extension must be hdf, h5, or nxs")
                output = file_base + file_ext
                (height, width) = np.shape(losa.load_image(list_file[0]))
                if nbit == 8:
                    data_type = "uint8"
                else:
                    data_type = "uint16"
                data_out = losa.open_hdf_stream(output, (depth, height, width),
                                                key_path="rescale/data",
                                                data_type=data_type,
                                                overwrite=False)
        data_res = []
        for i in range(0, depth):
            mat = rescale(losa.load_image(list_file[i]),
                          nbit=nbit,
                          minmax=(gmin, gmax))
            if output is None:
                data_res.append(mat)
            else:
                file_base, file_ext = os.path.splitext(output)
                if file_ext == "":
                    out_name = "0000" + str(i)
                    losa.save_image(output + "/img_" + out_name[-5:] + ".tif",
                                    mat)
                else:
                    data_out[i] = mat
    else:
        if isinstance(input_, str):
            file_ext = os.path.splitext(input_)[-1]
            if not (file_ext == '.hdf' or file_ext == '.h5'
                    or file_ext == ".nxs"):
                raise ValueError(
                    "Can't open this type of file format {}".format(file_ext))
            if key_path is None:
                raise ValueError(
                    "Please provide the key path to the dataset!!!")
            input_ = losa.load_hdf(input_, key_path)
        (depth, height, width) = input_.shape
        if minmax is None:
            if skip is None:
                skip = int(np.ceil(0.15 * depth))
            (gmin,
             gmax) = get_statical_information_dataset(input_,
                                                      skip=skip,
                                                      key_path=key_path)[0:2]
        else:
            (gmin, gmax) = minmax
        data_res = []
        if output is not None:
            file_base, file_ext = os.path.splitext(output)
            if file_ext != "":
                if not (file_ext == '.hdf' or file_ext == '.h5'
                        or file_ext == ".nxs"):
                    raise ValueError("File extension must be hdf, h5, or nxs")
                output = file_base + file_ext
                if nbit == 8:
                    data_type = "uint8"
                else:
                    data_type = "uint16"
                data_out = losa.open_hdf_stream(output, (depth, height, width),
                                                key_path="rescale/data",
                                                data_type=data_type,
                                                overwrite=False)
        for i in range(0, depth):
            mat = rescale(input_[i], nbit=nbit, minmax=(gmin, gmax))
            if output is None:
                data_res.append(mat)
            else:
                file_base, file_ext = os.path.splitext(output)
                if file_ext != "":
                    data_out[i] = mat
                else:
                    out_name = "0000" + str(i)
                    losa.save_image(output + "/img_" + out_name[-5:] + ".tif",
                                    mat)
    if output is None:
        return np.asarray(data_res)
Example #5
0
def downsample_dataset(input_,
                       output,
                       cell_size,
                       method="mean",
                       key_path=None):
    """
    Downsample a dataset. This can be a folder of tif files, a hdf file,
    or a 3D array.

    Parameters
    ----------
    input_ : str, array_like
        It can be a folder path to tif files, a hdf file, or 3D array.
    output : str, None
        It can be a folder path, a hdf file path, or None (memory consuming).
    cell_size : int or tuple of int
        Window size along axes used for grouping pixels.
    method : {"mean", "median", "max", "min"}
        Downsampling method.
    key_path : str, optional
        Key path to the dataset if the input is the hdf file.

    Returns
    -------
    array_like or None
        If output is None, returning an 3D array.
    """
    if output is not None:
        file_base, file_ext = os.path.splitext(output)
        if file_ext != "":
            file_base = os.path.dirname(output)
        if os.path.exists(file_base):
            raise ValueError("Folder exists!!! Please choose another path!!!")
    if method == "median":
        dsp_method = np.median
    elif method == "max":
        dsp_method = np.max
    elif method == "min":
        dsp_method = np.amin
    else:
        dsp_method = np.mean
    if isinstance(cell_size, int):
        cell_size = (cell_size, cell_size, cell_size)
    if isinstance(input_, str) and (os.path.splitext(input_)[-1] == ""):
        list_file = losa.find_file(input_ + "/*.tif*")
        depth = len(list_file)
        if depth == 0:
            raise ValueError("No tif files in the folder: {}".format(input_))
        (height, width) = np.shape(losa.load_image(list_file[0]))
        depth_dsp = depth // cell_size[0]
        height_dsp = height // cell_size[1]
        width_dsp = width // cell_size[2]
        num = 0
        if (depth_dsp != 0) and (height_dsp != 0) and (width_dsp != 0):
            if output is not None:
                file_base, file_ext = os.path.splitext(output)
                if file_ext != "":
                    if not (file_ext == '.hdf' or file_ext == '.h5'
                            or file_ext == ".nxs"):
                        raise ValueError(
                            "File extension must be hdf, h5, or nxs")
                output = file_base + file_ext
                data_out = losa.open_hdf_stream(
                    output, (depth_dsp, height_dsp, width_dsp),
                    key_path="downsample/data",
                    overwrite=False)
            data_dsp = []
            for i in range(0, depth, cell_size[0]):
                if (i + cell_size[0]) > depth:
                    break
                else:
                    mat = []
                    for j in range(i, i + cell_size[0]):
                        mat.append(losa.load_image(list_file[j]))
                    mat = np.asarray(mat)
                    mat = mat[:, :height_dsp * cell_size[1], :width_dsp *
                              cell_size[2]]
                    mat = mat.reshape(1, cell_size[0], height_dsp,
                                      cell_size[1], width_dsp, cell_size[2])
                    mat_dsp = dsp_method(dsp_method(dsp_method(mat, axis=-1),
                                                    axis=1),
                                         axis=2)
                    if output is None:
                        data_dsp.append(mat_dsp[0])
                    else:
                        if file_ext == "":
                            out_name = "0000" + str(num)
                            losa.save_image(
                                output + "/img_" + out_name[-5:] + ".tif",
                                mat_dsp[0])
                        else:
                            data_out[num] = mat_dsp[0]
                        num += 1
        else:
            raise ValueError("Incorrect cell size {}".format(cell_size))
    else:
        if isinstance(input_, str):
            file_ext = os.path.splitext(input_)[-1]
            if not (file_ext == '.hdf' or file_ext == '.h5'
                    or file_ext == ".nxs"):
                raise ValueError(
                    "Can't open this type of file format {}".format(file_ext))
            if key_path is None:
                raise ValueError(
                    "Please provide the key path to the dataset!!!")
            input_ = losa.load_hdf(input_, key_path)
        (depth, height, width) = input_.shape
        depth_dsp = depth // cell_size[0]
        height_dsp = height // cell_size[1]
        width_dsp = width // cell_size[2]
        if (depth_dsp != 0) and (height_dsp != 0) and (width_dsp != 0):
            if output is None:
                input_ = input_[:depth_dsp * cell_size[0], :height_dsp *
                                cell_size[1], :width_dsp * cell_size[2]]
                input_ = input_.reshape(depth_dsp, cell_size[0], height_dsp,
                                        cell_size[1], width_dsp, cell_size[2])
                data_dsp = dsp_method(dsp_method(dsp_method(input_, axis=-1),
                                                 axis=1),
                                      axis=2)
            else:
                file_base, file_ext = os.path.splitext(output)
                if file_ext != "":
                    if not (file_ext == '.hdf' or file_ext == '.h5'
                            or file_ext == ".nxs"):
                        raise ValueError(
                            "File extension must be hdf, h5, or nxs")
                    output = file_base + file_ext
                    data_out = losa.open_hdf_stream(
                        output, (depth_dsp, height_dsp, width_dsp),
                        key_path="downsample/data",
                        overwrite=False)
                num = 0
                for i in range(0, depth, cell_size[0]):
                    if (i + cell_size[0]) > depth:
                        break
                    else:
                        mat = input_[i:i + cell_size[0], :height_dsp *
                                     cell_size[1], :width_dsp * cell_size[2]]
                        mat = mat.reshape(1, cell_size[0], height_dsp,
                                          cell_size[1], width_dsp,
                                          cell_size[2])
                        mat_dsp = dsp_method(dsp_method(dsp_method(mat,
                                                                   axis=-1),
                                                        axis=1),
                                             axis=2)
                        if file_ext != "":
                            data_out[num] = mat_dsp[0]
                        else:
                            out_name = "0000" + str(num)
                            losa.save_image(
                                output + "/img_" + out_name[-5:] + ".tif",
                                mat_dsp[0])
                        num += 1
        else:
            raise ValueError("Incorrect cell size {}".format(cell_size))
    if output is None:
        return np.asarray(data_dsp)
Example #6
0
import timeit
import numpy as np
import multiprocessing as mp
import algotom.io.loadersaver as losa
import algotom.prep.correction as corr
from joblib import Parallel, delayed

scan_num = 129441
input_base = "/dls/i13/data/2021/mg26241-2/raw/"
output_base = "/dls/i13/data/2021/mg26241-2/processing/preprocessed/"
file_path = input_base + str(scan_num) + ".nxs"
ofile_name = str(scan_num) + "_processed.nxs"

# Load an MTF window determined outside Algotom.
mtf_win = losa.load_image("/dls/i12/data/2020/cm26476-4/processing/mtf_window.tif")
mtf_pad = 150
# Load distortion coefficients determined using the Discorpy package.
xcenter, ycenter, list_fact = losa.load_distortion_coefficient(
    "/dls/i12/data/2020/cm26476-4/processing/coefficients_bw.txt")

ncore = mp.cpu_count() - 1 # To process data in parallel.
chunk = 32  # Number of images to be loaded and processed in one go.

# Crop images after the distortion correction (pincushion type) to remove
# unwanted values around the edges.
crop_top = 20
crop_bottom = 20
crop_left = 20
crop_right = 20