Example #1
0
    def test_baseline(self):

        x2 = np.arange(1,100,0.5)
        base_ori = 0.001*x2
        base_exp = rampy.funexp(x2,0.1,0.05,50.)
        base_log = rampy.funlog(x2,1.,1.,1.,1.)


        y_ori = 1.0 * np.exp(-np.log(2) * ((x2-50.0)/10.0)**2) + 0.05*np.random.randn(len(x2))

        y2 = base_ori + y_ori
        y_exp = base_exp+y_ori
        y_log = base_log+y_ori


        # need to define some fitting regions for the spline
        roi2 = np.array([[1,20],[80,100]])

        # calculating the baselines
        ycalc1, base1 = rampy.baseline(x2,y2,roi2,'poly',polynomial_order=1)
        #ycalc2, base2 = rampy.baseline(x2,y2,roi2,'gcvspline',s=0.1 )
        ycalc3, base3 = rampy.baseline(x2,y2,roi2,'unispline',s=1e0)
        ycalc4, base4 = rampy.baseline(x2,y2,roi2,'als',lam=10**7,p=0.05)
        ycalc5, base5 = rampy.baseline(x2,y2,roi2,'arPLS',lam=10**7,ratio=0.1)
        ycalc6, base6 = rampy.baseline(x2,y2,roi2,'exp',p0_exp=[0.1,0.1,45])

        # Testing the shapes
        np.testing.assert_equal(ycalc1.shape,base1.shape)
        #np.testing.assert_equal(ycalc2.shape,base2.shape)
        np.testing.assert_equal(ycalc3.shape,base3.shape)
        np.testing.assert_equal(ycalc4.shape,base4.shape)
        np.testing.assert_equal(ycalc5.shape,base5.shape)
        np.testing.assert_equal(ycalc6.shape,base6.shape)
        #np.testing.assert_equal(ycalc7.shape,base7.shape)

        # testing the baselines
        np.testing.assert_almost_equal(base_ori,base1[:,0],0)
        #np.testing.assert_almost_equal(base_ori,base2[:,0],0)
        np.testing.assert_almost_equal(base_ori,base3[:,0],0)
        np.testing.assert_almost_equal(base_ori,base4[:,0],0)
        np.testing.assert_almost_equal(base_ori,base5[:,0],0)
        #exp-log cases
        np.testing.assert_almost_equal(base_exp,base6[:,0],0)
        #np.testing.assert_almost_equal(base_log,base7[:,0],0)

        #testing the corrected data
        np.testing.assert_almost_equal(y_ori,ycalc1[:,0],1)
        #np.testing.assert_almost_equal(y_ori,ycalc2[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc3[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc4[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc5[:,0],0)
        np.testing.assert_almost_equal(y_ori,ycalc6[:,0],0)
Example #2
0
def baseline(x_input, y_input, bir, method, **kwargs):
    """Allows subtracting a baseline under a x y spectrum.

    Parameters
    ----------
    x_input : ndarray
        x values.
    y_input : ndarray
        y values.
    bir : ndarray
        Contain the regions of interest, organised per line. 
        For instance, roi = np.array([[100., 200.],[500.,600.]]) will 
        define roi between 100 and 200 as well as between 500 and 600.
        Note: This is NOT used by the "als" and "arPLS" algorithms, but still is a requirement when calling the function.
        bir and method probably will become args in a futur iteration of rampy to solve this.
    methods : str
        "poly": polynomial fitting, with splinesmooth the degree of the polynomial.
        "unispline": spline with the UnivariateSpline function of Scipy, splinesmooth is 
                     the spline smoothing factor (assume equal weight in the present case);
        "gcvspline": spline with the gcvspl.f algorythm, really robust. 
                     Spectra must have x, y, ese in it, and splinesmooth is the smoothing factor;
                     For gcvspline, if ese are not provided we assume ese = sqrt(y). 
                     Requires the installation of gcvspline with a "pip install gcvspline" call prior to use;
        "exp": exponential background;
        "log": logarythmic background;
        "rubberband": rubberband baseline fitting;
        "als": automatic least square fitting following Eilers and Boelens 2005;
        "arPLS": automatic baseline fit using the algorithm from Baek et al. 2015 
                 Baseline correction using asymmetrically reweighted penalized least squares smoothing, Analyst 140: 250-257.

    kwargs
    ------
    polynomial_order : Int
        The degree of the polynomial (0 for a constant), default = 1.
    s : Float
        spline smoothing coefficient for the unispline and gcvspline algorithms.
    lam : Float
        float, the lambda smoothness parameter for the ALS and ArPLS algorithms. Typical values are between 10**2 to 10**9, default = 10**5.
    p : Float
        float, for the ALS algorithm, advised value between 0.001 to 0.1, default = 0.01.
    ratio : float
        ratio parameter of the arPLS algorithm. default = 0.01.
    niter : Int
        number of iteration of the ALS algorithm, default = 10.
    p0_exp : List
        containg the starting parameter for the exp baseline fit with curve_fit. Default = [1.,1.,1.].
    p0_log : List
        containg the starting parameter for the log baseline fit with curve_fit. Default = [1.,1.,1.,1.].

    Returns
    -------
    out1 : ndarray
        Contain the corrected signal.
    out2 : ndarray
        Contain the baseline.

    """
    # we get the signals in the bir
    yafit_unscaled = get_portion_interest(x_input, y_input, bir)

    # signal standard standardization with sklearn
    # this helps for polynomial fitting
    X_scaler = preprocessing.StandardScaler().fit(x_input.reshape(-1, 1))
    Y_scaler = preprocessing.StandardScaler().fit(y_input.reshape(-1, 1))

    # transformation
    x = X_scaler.transform(x_input.reshape(-1, 1))
    y = Y_scaler.transform(y_input.reshape(-1, 1))

    yafit = np.copy(yafit_unscaled)
    yafit[:, 0] = X_scaler.transform(yafit_unscaled[:, 0].reshape(-1, 1))[:, 0]
    yafit[:, 1] = Y_scaler.transform(yafit_unscaled[:, 1].reshape(-1, 1))[:, 0]

    y = y.reshape(len(y_input))

    if method == 'poly':

        # optional parameters
        poly_order = kwargs.get('polynomial_order', 1)

        coeffs = np.polyfit(yafit[:, 0], yafit[:, 1], poly_order)

        baseline_fitted = np.polyval(coeffs, x)

    elif method == 'unispline':

        # optional parameters
        splinesmooth = kwargs.get('s', 2.0)

        # fit of the baseline
        coeffs = UnivariateSpline(yafit[:, 0], yafit[:, 1], s=splinesmooth)

        baseline_fitted = coeffs(x)

    elif method == 'gcvspline':

        try:
            from gcvspline import gcvspline, splderivative
        except ImportError:
            print(
                'ERROR: Install gcvspline to use this mode (needs a working FORTRAN compiler).'
            )

        # optional parameters
        splinesmooth = kwargs.get('s', 2.0)

        # Spline baseline with mode 1 of gcvspl.f, see gcvspline documentation
        c, wk, ier = gcvspline(
            yafit[:, 0],
            yafit[:, 1],
            np.sqrt(np.abs(yafit[:, 1])),
            splinesmooth,
            splmode=1)  # gcvspl with mode 1 and smooth factor

        baseline_fitted = splderivative(x, yafit[:, 0], c)

    elif method == 'gaussian':
        ### Baseline is of the type y = a*exp(-log(2)*((x-b)/c)**2)
        # optional parameters
        p0_gauss = kwargs.get('p0_gaussian', [1., 1., 1.])
        ## fit of the baseline
        coeffs, pcov = curve_fit(rampy.gaussian,
                                 yafit[:, 0],
                                 yafit[:, 1],
                                 p0=p0_gauss)

        baseline_fitted = rampy.gaussian(x, coeffs[0], coeffs[1], coeffs[2])

    elif method == 'exp':
        ### Baseline is of the type y = a*exp(b*(x-xo))
        # optional parameters
        p0_exp = kwargs.get('p0_exp', [1., 1., 1.])
        ## fit of the baseline
        coeffs, pcov = curve_fit(rampy.funexp,
                                 yafit[:, 0],
                                 yafit[:, 1],
                                 p0=p0_exp)

        baseline_fitted = rampy.funexp(x, coeffs[0], coeffs[1], coeffs[2])

    elif method == 'log':
        ### Baseline is of the type y = a*exp(b*(x-xo))
        # optional parameters
        p0_log = kwargs.get('p0_log', [1., 1., 1., 1.])
        ## fit of the baseline
        coeffs, pcov = curve_fit(rampy.funlog,
                                 yafit[:, 0],
                                 yafit[:, 1],
                                 p0=p0_log)

        baseline_fitted = rampy.funlog(x, coeffs[0], coeffs[1], coeffs[2],
                                       coeffs[3])

    elif method == 'rubberband':
        # code from this stack-exchange forum
        #https://dsp.stackexchange.com/questions/2725/how-to-perform-a-rubberband-correction-on-spectroscopic-data

        # Find the convex hull
        v = ConvexHull(np.array([x, y])).vertices

        # Rotate convex hull vertices until they start from the lowest one
        v = np.roll(v, -v.argmin())
        # Leave only the ascending part
        v = v[:v.argmax()]

        # Create baseline using linear interpolation between vertices
        baseline_fitted = np.interp(x, x[v], y[v])

    elif method == 'als':
        # Matlab code in Eilers et Boelens 2005
        # Python addaptation found on stackoverflow: https://stackoverflow.com/questions/29156532/python-baseline-correction-library

        # optional parameters
        lam = kwargs.get('lam', 1.0 * 10**5)
        p = kwargs.get('p', 0.01)
        niter = kwargs.get('niter', 10)

        # starting the algorithm
        L = len(y)
        D = sparse.csc_matrix(np.diff(np.eye(L), 2))
        w = np.ones(L)
        for i in range(niter):
            W = sparse.spdiags(w, 0, L, L)
            Z = W + lam * D.dot(D.transpose())
            z = sparse.linalg.spsolve(Z, w * y)
            w = p * (y > z) + (1 - p) * (y < z)

        baseline_fitted = z

    elif method == 'arPLS':
        # Adaptation of the Matlab code in Baek et al 2015

        # optional parameters
        lam = kwargs.get('lam', 1.0 * 10**5)
        ratio = kwargs.get('ratio', 0.01)

        N = len(y)
        D = sparse.csc_matrix(np.diff(np.eye(N), 2))
        w = np.ones(N)

        while True:
            W = sparse.spdiags(w, 0, N, N)
            Z = W + lam * D.dot(D.transpose())
            z = sparse.linalg.spsolve(Z, w * y)
            d = y - z
            # make d- and get w^t with m and s
            dn = d[d < 0]
            m = np.mean(dn)
            s = np.std(dn)
            wt = 1.0 / (1 + np.exp(2 * (d - (2 * s - m)) / s))
            # check exit condition and backup
            if norm(w - wt) / norm(w) < ratio:
                break
            w = wt

        baseline_fitted = z

    return y_input.reshape(-1, 1) - Y_scaler.inverse_transform(
        baseline_fitted.reshape(-1, 1)), Y_scaler.inverse_transform(
            baseline_fitted.reshape(-1, 1))
Example #3
0
def baseline(x_input,y_input,bir,method, **kwargs):
    """Allows subtracting a baseline under a x y spectrum.

    Parameters
    ----------
    x_input : ndarray
        x values.
    y_input : ndarray
        y values.
    bir : ndarray
        Contain the regions of interest, organised per line. 
        For instance, roi = np.array([[100., 200.],[500.,600.]]) will 
        define roi between 100 and 200 as well as between 500 and 600.
        Note: This is NOT used by the "als" and "arPLS" algorithms, but still is a requirement when calling the function.
        bir and method probably will become args in a futur iteration of rampy to solve this.
    methods : str
        "poly": polynomial fitting, with splinesmooth the degree of the polynomial.
        "unispline": spline with the UnivariateSpline function of Scipy, splinesmooth is 
                     the spline smoothing factor (assume equal weight in the present case);
        "gcvspline": spline with the gcvspl.f algorythm, really robust. 
                     Spectra must have x, y, ese in it, and splinesmooth is the smoothing factor;
                     For gcvspline, if ese are not provided we assume ese = sqrt(y). 
                     Requires the installation of gcvspline with a "pip install gcvspline" call prior to use;
        "exp": exponential background;
        "log": logarythmic background;
        "rubberband": rubberband baseline fitting;
        "als": automatic least square fitting following Eilers and Boelens 2005;
        "arPLS": automatic baseline fit using the algorithm from Baek et al. 2015 
                 Baseline correction using asymmetrically reweighted penalized least squares smoothing, Analyst 140: 250-257.

    kwargs
    ------
    polynomial_order : Int
        The degree of the polynomial (0 for a constant), default = 1.
    s : Float
        spline smoothing coefficient for the unispline and gcvspline algorithms.
    lam : Float
        float, the lambda smoothness parameter for the ALS and ArPLS algorithms. Typical values are between 10**2 to 10**9, default = 10**5.
    p : Float
        float, for the ALS algorithm, advised value between 0.001 to 0.1, default = 0.01.
    ratio : float
        ratio parameter of the arPLS algorithm. default = 0.01.
    niter : Int
        number of iteration of the ALS algorithm, default = 10.
    p0_exp : List
        containg the starting parameter for the exp baseline fit with curve_fit. Default = [1.,1.,1.].
    p0_log : List
        containg the starting parameter for the log baseline fit with curve_fit. Default = [1.,1.,1.,1.].

    Returns
    -------
    out1 : ndarray
        Contain the corrected signal.
    out2 : ndarray
        Contain the baseline.

    """
    # we get the signals in the bir
    yafit_unscaled = get_portion_interest(x_input,y_input,bir)

    # signal standard standardization with sklearn
    # this helps for polynomial fitting
    X_scaler = preprocessing.StandardScaler().fit(x_input.reshape(-1, 1))
    Y_scaler = preprocessing.StandardScaler().fit(y_input.reshape(-1, 1))

    # transformation
    x = X_scaler.transform(x_input.reshape(-1, 1))
    y = Y_scaler.transform(y_input.reshape(-1, 1))

    yafit = np.copy(yafit_unscaled)
    yafit[:,0] = X_scaler.transform(yafit_unscaled[:,0].reshape(-1, 1))[:,0]
    yafit[:,1] = Y_scaler.transform(yafit_unscaled[:,1].reshape(-1, 1))[:,0]

    y = y.reshape(len(y_input))

    if method == 'poly':

        # optional parameters
        poly_order = kwargs.get('polynomial_order',1)

        coeffs = np.polyfit(yafit[:,0],yafit[:,1],poly_order)

        baseline_fitted = np.polyval(coeffs,x)

    elif method == 'unispline':

        # optional parameters
        splinesmooth = kwargs.get('s',2.0)

        # fit of the baseline
        coeffs = UnivariateSpline(yafit[:,0],yafit[:,1], s=splinesmooth)

        baseline_fitted = coeffs(x)

    elif method == 'gcvspline':

        try:
            from gcvspline import gcvspline, splderivative
        except ImportError:
            print('ERROR: Install gcvspline to use this mode (needs a working FORTRAN compiler).')
            
        # optional parameters
        splinesmooth = kwargs.get('s',2.0)

        # Spline baseline with mode 1 of gcvspl.f, see gcvspline documentation
        c, wk, ier = gcvspline(yafit[:,0],yafit[:,1],np.sqrt(np.abs(yafit[:,1])),splinesmooth,splmode = 1) # gcvspl with mode 1 and smooth factor

        baseline_fitted = splderivative(x,yafit[:,0],c)

    elif method == 'gaussian':
        ### Baseline is of the type y = a*exp(-log(2)*((x-b)/c)**2)
        # optional parameters
        p0_gauss = kwargs.get('p0_gaussian',[1.,1.,1.])
        ## fit of the baseline
        coeffs, pcov = curve_fit(rampy.gaussian,yafit[:,0],yafit[:,1],p0 = p0_gauss)

        baseline_fitted = rampy.gaussian(x,coeffs[0],coeffs[1],coeffs[2])

    elif method == 'exp':
        ### Baseline is of the type y = a*exp(b*(x-xo))
        # optional parameters
        p0_exp = kwargs.get('p0_exp',[1.,1.,1.])
        ## fit of the baseline
        coeffs, pcov = curve_fit(rampy.funexp,yafit[:,0],yafit[:,1],p0 = p0_exp)

        baseline_fitted = rampy.funexp(x,coeffs[0],coeffs[1],coeffs[2])

    elif method == 'log':
        ### Baseline is of the type y = a*exp(b*(x-xo))
        # optional parameters
        p0_log = kwargs.get('p0_log',[1.,1.,1.,1.])
        ## fit of the baseline
        coeffs, pcov = curve_fit(rampy.funlog,yafit[:,0],yafit[:,1],p0 = p0_log)

        baseline_fitted = rampy.funlog(x,coeffs[0],coeffs[1],coeffs[2],coeffs[3])

    elif method == 'rubberband':
        # code from this stack-exchange forum
        #https://dsp.stackexchange.com/questions/2725/how-to-perform-a-rubberband-correction-on-spectroscopic-data

        # Find the convex hull
        v = ConvexHull(np.array([x, y])).vertices

        # Rotate convex hull vertices until they start from the lowest one
        v = np.roll(v, -v.argmin())
        # Leave only the ascending part
        v = v[:v.argmax()]

        # Create baseline using linear interpolation between vertices
        baseline_fitted = np.interp(x, x[v], y[v])

    elif method == 'als':
        # Matlab code in Eilers et Boelens 2005
        # Python addaptation found on stackoverflow: https://stackoverflow.com/questions/29156532/python-baseline-correction-library

        # optional parameters
        lam = kwargs.get('lam',1.0*10**5)
        p = kwargs.get('p',0.01)
        niter = kwargs.get('niter',10)

        # starting the algorithm
        L = len(y)
        D = sparse.csc_matrix(np.diff(np.eye(L), 2))
        w = np.ones(L)
        for i in range(niter):
            W = sparse.spdiags(w, 0, L, L)
            Z = W + lam * D.dot(D.transpose())
            z = sparse.linalg.spsolve(Z, w*y)
            w = p * (y > z) + (1-p) * (y < z)

        baseline_fitted = z

    elif method == 'arPLS':
        # Adaptation of the Matlab code in Baek et al 2015

        # optional parameters
        lam = kwargs.get('lam',1.0*10**5)
        ratio = kwargs.get('ratio',0.01)

        N = len(y)
        D = sparse.csc_matrix(np.diff(np.eye(N), 2))
        w = np.ones(N)

        while True:
            W = sparse.spdiags(w, 0, N, N)
            Z = W + lam * D.dot(D.transpose())
            z = sparse.linalg.spsolve(Z, w*y)
            d = y - z
            # make d- and get w^t with m and s
            dn = d[d<0]
            m = np.mean(dn)
            s = np.std(dn)
            wt = 1.0/(1 + np.exp( 2* (d-(2*s-m))/s ) )
            # check exit condition and backup
            if norm(w-wt)/norm(w) < ratio:
                break
            w = wt

        baseline_fitted = z

    return y_input.reshape(-1,1)-Y_scaler.inverse_transform(baseline_fitted.reshape(-1, 1)), Y_scaler.inverse_transform(baseline_fitted.reshape(-1, 1))
    
Example #4
0
    def test_baseline(self):

        x2 = np.arange(1, 100, 0.5)
        base_ori = 0.001 * x2
        base_exp = rampy.funexp(x2, 0.1, 0.05, 50.)
        base_log = rampy.funlog(x2, 1., 1., 1., 1.)

        y_ori = 1.0 * np.exp(-np.log(2) * (
            (x2 - 50.0) / 10.0)**2) + 0.05 * np.random.randn(len(x2))

        y2 = base_ori + y_ori
        y_exp = base_exp + y_ori
        y_log = base_log + y_ori

        # need to define some fitting regions for the spline
        roi2 = np.array([[1, 20], [80, 100]])

        # calculating the baselines
        ycalc1, base1 = rampy.baseline(x2,
                                       y2,
                                       roi2,
                                       'poly',
                                       polynomial_order=1)
        #ycalc2, base2 = rampy.baseline(x2,y2,roi2,'gcvspline',s=0.1 )
        ycalc3, base3 = rampy.baseline(x2, y2, roi2, 'unispline', s=1e0)
        ycalc4, base4 = rampy.baseline(x2, y2, roi2, 'als', lam=10**7, p=0.05)
        ycalc5, base5 = rampy.baseline(x2,
                                       y2,
                                       roi2,
                                       'arPLS',
                                       lam=10**7,
                                       ratio=0.1)
        ycalc6, base6 = rampy.baseline(x2, y2, roi2, 'drPLS')
        ycalc7, base7 = rampy.baseline(x2,
                                       y2,
                                       roi2,
                                       'exp',
                                       p0_exp=[0.1, 0.1, 45])

        # Testing the shapes
        np.testing.assert_equal(ycalc1.shape, base1.shape)
        #np.testing.assert_equal(ycalc2.shape,base2.shape)
        np.testing.assert_equal(ycalc3.shape, base3.shape)
        np.testing.assert_equal(ycalc4.shape, base4.shape)
        np.testing.assert_equal(ycalc5.shape, base5.shape)
        np.testing.assert_equal(ycalc6.shape, base6.shape)
        np.testing.assert_equal(ycalc7.shape, base7.shape)

        # testing the baselines
        np.testing.assert_almost_equal(base_ori, base1[:, 0], 0)
        #np.testing.assert_almost_equal(base_ori,base2[:,0],0)
        np.testing.assert_almost_equal(base_ori, base3[:, 0], 0)
        np.testing.assert_almost_equal(base_ori, base4[:, 0], 0)
        np.testing.assert_almost_equal(base_ori, base5[:, 0], 0)
        np.testing.assert_almost_equal(base_ori, base6[:, 0], 0)
        #exp-log cases
        np.testing.assert_almost_equal(base_exp, base7[:, 0], 0)

        #testing the corrected data
        np.testing.assert_almost_equal(y_ori, ycalc1[:, 0], 1)
        #np.testing.assert_almost_equal(y_ori,ycalc2[:,0],0)
        np.testing.assert_almost_equal(y_ori, ycalc3[:, 0], 0)
        np.testing.assert_almost_equal(y_ori, ycalc4[:, 0], 0)
        np.testing.assert_almost_equal(y_ori, ycalc5[:, 0], 0)
        np.testing.assert_almost_equal(y_ori, ycalc6[:, 0], 0)
        np.testing.assert_almost_equal(y_exp, ycalc7[:, 0], 0)