Beispiel #1
0
    def fit(self,
            x,
            y: Sequence[MedicalVolume],
            mask=None,
            copy_headers: bool = True):
        """Perform linear least squares fit.

        Args:
            x (array-like): Same as :meth:`CurveFitter.fit`.
            y (Sequence[MedicalVolume]): Same as :meth:`CurveFitter.fit`.
            mask (MedicalVolume or ndarray): Same as :meth:`CurveFitter.fit`.
            copy_headers (bool, optional): If ``True``, headers will be deep copied. If ``False``,
                headers will not be copied. Returned values will not have headers.

        Returns:
            Tuple[MedicalVolume, MedicalVolume]: Tuple of fitted parameters (``popt``)
                and goodness of fit (``r2``) values (same as `CurveFitter.fit`). The last
                axis of ``popt`` corresponds to different polynomial parameters in order
                ``y = popt[..., 0] * x ** self.deg + ... + popt[..., self.deg-1]``.
        """
        device = get_device(x)
        y_devices = [get_device(_y) for _y in y]
        if any(_y_device != device for _y_device in y_devices):
            raise RuntimeError(
                f"All elements in `y` must be on the same device as `x` ({device}). "
                f"Got {y_devices}.")

        return super().fit(x, y, mask=mask, copy_headers=copy_headers)
Beispiel #2
0
    def test_to_device(self):
        arr = np.ones((3, 3, 3))
        mv = MedicalVolume(arr, affine=np.eye(4))

        arr2 = to_device(arr, -1)
        assert get_device(arr2) == cpu_device

        mv2 = to_device(mv, -1)
        assert get_device(mv2) == cpu_device
Beispiel #3
0
    def fit(self,
            x,
            y: Sequence[MedicalVolume],
            mask=None,
            p0=np._NoValue,
            copy_headers: bool = True):
        """Perform non-linear least squares fit.

        Args:
            x (array-like): 1D array of independent variables corresponding to different ``y``.
            y (list[MedicalVolumes]): Dependent variable (in order) corresponding to values of
                independent variable ``x``. Data must be spatially aligned.
            mask (``MedicalVolume`` or ``ndarray``, optional): If specified, only entries where
                ``mask > 0`` will be fit.
            p0 (Sequence, optional): Initial guess for the parameters.
                Defaults to ``self.p0``.
            copy_headers (bool, optional): If ``True``, headers will be deep copied. If ``False``,
                headers will not be copied. Returned values will not have headers.

        Returns:
            Tuple[MedicalVolume, MedicalVolume]: Tuple of fitted parameters (``popt``)
            and goodness of fit (``r2``) values. Last axis of fitted parameters
            corresponds to different parameters in order of appearance in ``self.func``.
        """
        if get_device(x) != cpu_device:
            raise RuntimeError("`x` must be on the CPU")
        if any(get_device(_y) != cpu_device for _y in y):
            raise RuntimeError("All elements in `y` must be on the CPU")

        if mask is not None:
            mask = self._process_mask(mask, y[0])

        if p0 is np._NoValue:
            p0 = self.p0
        p0 = self._format_p0(
            p0,
            ref=y[0],
            flatten=True,
            mask=mask.A.reshape(-1) if mask is not None else None,
        )

        return super().fit(x, y, mask=mask, p0=p0, copy_headers=copy_headers)
Beispiel #4
0
    def volume(self, value):
        """
        If the volume is of a different shape, the headers are no longer valid,
        so delete all reorientations are done as part of MedicalVolume,
        so reorientations are permitted.

        However, external setting of the volume to a different shape array is not allowed.
        """
        if value.ndim != self._volume.ndim:
            raise ValueError("New volume must be same as current volume")

        if self._volume.shape != value.shape:
            self._headers = None

        self._volume = value
        self._device = get_device(self._volume)
Beispiel #5
0
 def device(self) -> Device:
     """The device the object is on."""
     return get_device(self._volume)
Beispiel #6
0
def polyfit(
    x,
    y,
    deg: int,
    rcond=None,
    full=False,
    w=None,
    cov=False,
    eps=1e-8,
    y_bounds=None,
    show_pbar=False,
    num_workers=None,
    chunksize: int = None,
):
    """Use linear least squares to fit a polynomial of degree ``deg`` to data.

    This function is a wrapper around the :func:`numpy.polyfit` and :func:`cupy.polyfit`
    functions.

    In addition to standard polyfit functionality, this function also supports
    multiprocessing of the data using multiple workers. When multiprocessing is enabled,
    each data sequence is fit using a separate worker.

    In most cases, multiprocessing is not needed for linear least squares as most
    computations can be done quickly via matrix decomposition. However, there are cases
    where ``y`` values can cause low condition numbers and/or invalid outputs. In these cases,
    solving for each data sequence separately can be beneficial.

    Solving each data sequence separately without multiprocessing can also be done by setting
    ``num_workers=0``. This can be useful when certain data sequences are ill conditioned.

    Args:
        x (ndarray): The independent variable(s) where the data is measured.
            Should usually be an M-length sequence or an (k,M)-shaped array for functions
            with k predictors, but can actually be any object.
        y (ndarray): The dependent data, a length M array - nominally func(xdata, ...) - or
            an (M,N)-shaped array for N different sequences.
        deg (int): Degree of the fitting polynomial. Same as :func:`numpy.polyfit`.
        rcond (float, optional): Same as :func:`numpy.polyfit`.
        full (bool, optional): Same as :func:`numpy.polyfit`.
        w (array-like, optional): Same as :func:`numpy.polyfit`.
        cov (bool, optional): Same as :func:`numpy.polyfit`.
        eps (float, optional): Epsilon for computing r-squared.
        y_bounds (tuple, optional): Same as :func:`curve_fit`.
        show_pbar (bool, optional): Same as :func:`curve_fit`.
        num_workers (int, optional): Maximum number of workers to use for fitting.
            If ``None``, all data sequences should be solved as a single least squares problem.
            If ``0``, each data sequence will be fit separately from one another.
            Defaults to ``None``.
        chunksize (int, optional): Same as :func:`curve_fit`.
            Only used when ``num_workers`` is not ``None``.
    """
    def _compute_r2_matrix(_x, _y, _popts):
        """
        Here, ``M`` refers to # sample points, ``K`` refers to # sequences
        This function needs to be run under the correct context
        Args:
            _x: array_like, shape (M,)
            _y: array_like, shape (M, K)
            _popts (array-like): Shape (deg+1, K)
        """
        xp = get_array_module(_y)

        _x = _x.flatten()
        _xs = xp.stack([_x**i for i in range(len(_popts) - 1, -1, -1)],
                       axis=-1)
        yhat = _xs @ _popts  # (M, K)

        residuals = yhat - _y
        ss_res = xp.sum(residuals**2, axis=0)
        ss_tot = xp.sum((_y - xp.mean(_y, axis=0, keepdims=True))**2, axis=0)
        return 1 - (ss_res / (ss_tot + eps))

    x_device, y_device = get_device(x), get_device(y)
    if x_device != y_device:
        raise ValueError(
            f"`x` ({x_device}) and `y` ({x_device}) must be on the same device"
        )

    scatter_data = num_workers is not None
    if (cov or full) and scatter_data:
        raise ValueError("`cov` or `full` cannot be used with multiprocessing")

    xp = get_array_module(x)

    x = xp.asarray(x)
    y = xp.asarray(y)
    if y.ndim == 1:
        y = y.reshape(y.shape + (1, ))
    N = y.shape[-1]

    num_workers = min(num_workers, N) if num_workers is not None else None

    oob = y_bounds is not None and ((y < y_bounds[0]).any() or
                                    (y > y_bounds[1]).any())
    if oob:
        warnings.warn(
            "Out of bounds values found. Failure in fit will result in np.nan")

    fitter = partial(_polyfit,
                     x=x,
                     deg=deg,
                     y_bounds=y_bounds,
                     rcond=rcond,
                     w=w,
                     eps=eps,
                     xp=xp.__name__)

    residuals, rank, singular_values, rcond, V = None, None, None, None, None
    with x_device:
        if not scatter_data:
            # Fit all sequences as one least squares problem.
            out = xp.polyfit(x, y, deg, rcond=rcond, full=full, w=w, cov=cov)
            if full:
                popts, residuals, rank, singular_values, rcond = out
            elif cov:
                popts, V = out
            else:
                popts = out
            r_squared = _compute_r2_matrix(x, y, popts)
            popts = popts.T
        elif num_workers == 0:
            # Fit each sequence separately
            popts = []
            r_squared = []
            for i in tqdm(range(N), disable=not show_pbar):
                popt_, r2_ = fitter(y[:, i])
                popts.append(popt_)
                r_squared.append(r2_)
            popts = xp.stack(popts, axis=0)
        else:
            # Multiprocessing - fits each sequence separately
            if show_pbar:
                tqdm_kwargs = {"chunksize": chunksize}
                tqdm_kwargs = {
                    k: v
                    for k, v in tqdm_kwargs.items() if v is not None
                }
                data = process_map(fitter,
                                   y.T,
                                   max_workers=num_workers,
                                   **tqdm_kwargs)
            else:
                with mp.Pool(num_workers) as p:
                    data = p.map(fitter, y.T, chunksize=chunksize)
            popts, r_squared = [x[0] for x in data], [x[1] for x in data]
            popts = xp.stack(popts, axis=0)

        r_squared = xp.asarray(r_squared)

    if full:
        return popts, r_squared, residuals, rank, singular_values, rcond
    elif cov:
        return popts, r_squared, V
    else:
        return popts, r_squared
Beispiel #7
0
def curve_fit(
    func,
    x,
    y,
    y_bounds=None,
    p0=None,
    maxfev=100,
    ftol=1e-5,
    eps=1e-8,
    show_pbar=False,
    num_workers=0,
    chunksize: int = None,
    **kwargs,
):
    """Use non-linear least squares to fit a function ``func`` to data.

    Uses :func:`scipy.optimize.curve_fit` backbone.

    Args:
        func (callable): The model function, f(x, ...). It must take the independent variable
            as the first argument and the parameters to fit as separate remaining arguments.
        x (ndarray): The independent variable(s) where the data is measured.
            Should usually be an M-length sequence or an (k,M)-shaped array for functions
            with k predictors, but can actually be any object.
        y (ndarray): The dependent data, a length M array - nominally func(xdata, ...) - or
            an (M,N)-shaped array for N different sequences.
        y_bounds (tuple, optional): Lower and upper bound on y values. Defaults to no bounds.
            Sequences with observations out of this range will not be processed.
        p0 (Number | Sequence[Number] | ndarray | Dict, optional): Initial guess for the parameters.
            If sequence (e.g. list, tuple, 1d ndarray), it should have length P, which is the
            number of parameters. If this is a 2D numpy array, it should have a shape ``(N, P)``.
            If ``None``, then the initial values will all be 1.
        maxfev (int, optional): Maximum number of function evaluations before the termination.
            If `bounds` argument for `scipy.optimize.curve_fit` is specified, this corresponds
            to the `max_nfev` in the least squares algorithm
        ftol (float): Tolerance for termination by the change of the cost function.
            See `scipy.optimize.least_squares` for more details.
        eps (float, optional): Epsilon for computing r-squared.
        show_pbar (bool, optional): If `True`, show progress bar. Note this can increase runtime
            slightly when using multiple workers.
        num_workers (int, optional): Maximum number of workers to use for fitting.
        chunksize (int, optional): Size of chunks sent to worker processes when
            ``num_workers > 0``. When ``show_pbar=True``, this defaults to the standard
            value in :func:`tqdm.concurrent.process_map`.
        kwargs: Keyword args for `scipy.optimize.curve_fit`.

    Returns:
        Tuple[ndarray, ndarray]:

            popts (ndarray): A NxP matrix of fitted values. The last dimension (``axis=-1``)
            corresponds to the different parameters (in order).

            rsquared (ndarray): A (N,) length matrix of r-squared goodness-of-fit values.
    """
    if (get_device(x) != cpu_device) or (get_device(y) != cpu_device):
        raise RuntimeError("`x` and `y` must be on CPU")

    x = np.asarray(x)
    y = np.asarray(y)
    if y.ndim == 1:
        y = y.reshape(y.shape + (1, ))
    N = y.shape[-1]

    func_args = list(inspect.signature(func).parameters)
    nparams = len(func_args) - 2 if "self" in func_args else len(func_args) - 1
    param_args = func_args[2:] if "self" in func_args else func_args[1:]

    if p0 is None:
        p0_scalars, p0_seq = None, None
    else:
        p0_scalars, p0_seq = _format_p0(p0, param_args, N)

    if "bounds" not in kwargs:
        kwargs["maxfev"] = maxfev
    elif "max_nfev" not in kwargs:
        kwargs["max_nfev"] = maxfev

    num_workers = min(num_workers, N)
    fitter = partial(
        _curve_fit,
        x=x,
        func=func,
        y_bounds=y_bounds,
        p0=p0_scalars,
        ftol=ftol,
        eps=eps,
        nparams=nparams,
        **kwargs,
    )

    oob = y_bounds is not None and ((y < y_bounds[0]).any() or
                                    (y > y_bounds[1]).any())
    if oob:
        warnings.warn(
            "Out of bounds values found. Failure in fit will result in np.nan")

    y_T = y.T
    if p0_seq:
        y_T = [{
            "y": y_T[i],
            "p0": {k: v[i]
                   for k, v in p0_seq.items()}
        } for i in range(N)]

    popts = []
    r_squared = []
    if not num_workers:
        for i in tqdm(range(N), disable=not show_pbar):
            popt_, r2_ = fitter(y_T[i])
            popts.append(popt_)
            r_squared.append(r2_)
    else:
        if show_pbar:
            tqdm_kwargs = {"chunksize": chunksize}
            tqdm_kwargs = {
                k: v
                for k, v in tqdm_kwargs.items() if v is not None
            }
            data = process_map(fitter,
                               y_T,
                               max_workers=num_workers,
                               **tqdm_kwargs)
        else:
            with mp.Pool(num_workers) as p:
                data = p.map(fitter, y_T, chunksize=chunksize)
        popts, r_squared = [x[0] for x in data], [x[1] for x in data]

    return np.stack(popts, axis=0), np.asarray(r_squared)