Beispiel #1
0
    def gaussianVariances(self) -> np.ndarray:
        """Calculate gaussian parametric variances for uncertainty estimation
        
        Returns
        -------
        np.ndarray
            The variances
        """
        from resistics.regression.robust import hermitianTranspose
        from resistics.regression.moments import getScale

        if self.params is None:
            return None
        resids = self.resids
        if resids is None:
            resids = self.y - np.dot(self.A, self.params)
        scale = self.scale
        if scale is None:
            scale = getScale(resids, "mad0")

        # calculate residual variance (standard deviation squared)
        varianceResid = scale * scale

        # calculate predictor variance - this is a pxp square matrix
        # should the weights be incorporated?
        # variancePredict = np.dot(hermitianTranspose(self.A), weights*self.A)
        variancePredict = np.dot(hermitianTranspose(self.A), self.A)
        variancePredict = np.linalg.inv(variancePredict)

        # calculate output uncertainty
        varianceParams = 1.91472 * varianceResid * variancePredict
        # take the diagonal elements - this should be a real number
        varianceParams = np.diag(varianceParams).real
        return varianceParams
def test_regression_moments_mad0() -> None:
    """This is MAD with zeros removed and using a 0 location"""
    from resistics.regression.moments import mad0, getScale
    import numpy as np
    import scipy.stats as stats

    data = np.array([0, 1, 2, 3, 1, 3, 4, 0, 4, 5])
    expected = 3/stats.norm.ppf(3/4.)
    assert mad0(data) == expected
    assert getScale(data, "mad0") == expected
    assert getScale(data) == expected
    data = np.array([0, -1, 2, -3, 1, 3, -4, 0, 4, 5])
    expected = 3/stats.norm.ppf(3/4.)
    assert mad0(data) == (3/stats.norm.ppf(3/4.))
    assert getScale(data, "mad0") == expected
    assert getScale(data) == expected
def test_regression_moments_mad() -> None:
    """Test the MAD - median deviation from median"""
    from resistics.regression.moments import mad, getScale
    import numpy as np
    import scipy.stats as stats
    import statsmodels.api as sm

    np.random.seed(12345)
    fat_tails = stats.t(6).rvs(40)
    smout = sm.robust.scale.mad(np.absolute(fat_tails))
    madout = mad(fat_tails)
    scaleout = getScale(fat_tails, "mad")
    np.testing.assert_almost_equal(smout, madout)
    np.testing.assert_almost_equal(smout, scaleout)
Beispiel #4
0
def chatterjeeMachlerMod(A, y, **kwargs):
    # using the weights in chaterjeeMachler means that min resids val in median(resids)
    # instead, use M estimate weights with a modified residual which includes a measure of leverage
    # for this, use residuals / (1-p)^2
    # I wonder if this will have a divide by zero bug
    from resistics.common.math import eps
    from resistics.regression.moments import getLocation, getScale
    from resistics.regression.weights import getWeights
    from resistics.regression.robust import defaultOptions, applyWeights, olsModel
    import numpy.linalg as linalg

    # now calculate p and n
    n = A.shape[0]
    p = A.shape[1]
    pnRatio = 1.0 * p / n

    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for i in range(0, n):
        Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real
    del q, r
    Pdiag = Pdiag / (np.max(Pdiag) + 0.0000000001)
    locP = getLocation(Pdiag, "median")
    scaleP = getScale(Pdiag, "mad")
    # bound = locP + 6*scaleP
    bound = locP + 6 * scaleP
    indices = np.where(Pdiag > bound)
    Pdiag[indices] = 0.99999
    leverageMeas = np.power(1.0 - Pdiag, 2)

    # weights for the first iteration
    # this is purely based on the leverage
    tmp = np.ones(shape=(n), dtype="float") * pnRatio
    tmp = np.maximum(Pdiag, tmp)
    weights = np.reciprocal(tmp)

    # get options
    options = parseKeywords(defaultOptions(), kwargs, printkw=False)
    # generalPrint("S-Estimate", "Using weight function = {}".format(weightFnc))
    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = applyWeights(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew,
                                                                ynew,
                                                                rcond=None)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights
        residsNew = residsNew / leverageMeas
        scale = getScale(residsNew, "mad0")

        # standardise and calculate weights
        residsNew = residsNew / scale
        weightsNew = getWeights(residsNew, "huber")
        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        if iteration > 1:
            # check to see whether the change is smaller than the tolerance
            changeResids = linalg.norm(residsNew -
                                       resids) / linalg.norm(residsNew)
            if changeResids < eps():
                # update resids
                resids = residsNew
                break
        # update resids
        resids = residsNew

    # now do the same again, but with a different function
    # do the least squares solution
    params, resids, squareResid, rank, s = olsModel(A, y)
    resids = resids / leverageMeas
    resids = resids / scale
    weights = getWeights(resids, "trimmedMean")
    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = applyWeights(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew,
                                                                ynew,
                                                                rcond=None)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights

        residsNew = residsNew / leverageMeas
        scale = getScale(residsNew, "mad0")

        # standardise and calculate weights
        residsNew = residsNew / scale
        weightsNew = getWeights(residsNew, options["weights"])
        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        # check to see whether the change is smaller than the tolerance
        changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew)
        if changeResids < eps():
            # update resids
            resids = residsNew
            break
        # update resids
        resids = residsNew

    # at the end, return the components
    return params, resids, weights
Beispiel #5
0
def mestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]:
    r"""Mestimate robust least squares

    Solves for :math:`x` where,

    .. math::        
        y = Ax .

    Good method for dependent outliers (in :math:`y`). Not robust against independent outliers (leverage points)

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    initial : Dict
        Initial model parameters and scale
    scale : optional
        A scale estimate
    intercept : bool, optional
        True or False for adding an intercept term
    weights : str, optional
        The weights to use

    Returns
    -------
    RegressionData
        RegressionData instance with the parameters, residuals, weights and scale
    """
    from resistics.common.math import eps
    from resistics.regression.moments import getLocation, getScale
    from resistics.regression.weights import getWeights
    from resistics.regression.data import RegressionData
    import numpy.linalg as linalg

    options = parseKeywords(defaultOptions(), kwargs, printkw=False)
    # calculate the leverage
    n = A.shape[0]
    p = A.shape[1]
    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for ii in range(0, n):
        Pdiag[ii] = np.absolute(np.sum(q[ii, :] * np.conjugate(q[ii, :]))).real
    Pdiag = Pdiag / np.max(Pdiag)
    leverageScale = getScale(Pdiag, "mad0")
    leverageWeights = getWeights(Pdiag / leverageScale, "huber")

    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # see whether to do an initial OLS model or whether one is provided
    if options["initial"]:
        params, resids, scale = initialModel(options["initial"])
    else:
        soln = olsModel(A, y)
        resids = soln.resids
        scale = getScale(resids, "mad0")

    # if an initial model was not provided but an initial scale was, replace the one here
    if options["scale"]:
        scale = options["scale"]

    # standardised residuals and weights
    weights = getWeights(resids / scale, options["weights"]) * leverageWeights
    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = applyWeights(A, y, weights)
        paramsNew, _squareResidNew, _rankNew, _sNew = linalg.lstsq(Anew,
                                                                   ynew,
                                                                   rcond=None)
        residsNew = y - np.dot(A, paramsNew)

        if np.sum(np.absolute(residsNew)) < eps():
            return RegressionData(A,
                                  y,
                                  params=paramsNew,
                                  resids=residsNew,
                                  scale=scale,
                                  weights=weights)

        # standardise and calculate weights
        scale = getScale(residsNew, "mad0")
        weightsNew = getWeights(residsNew / scale,
                                options["weights"]) * leverageWeights
        # increment iteration and save weightsNew
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        # check to see whether the change is smaller than the tolerance
        # use the R method of checking change in residuals (can check change in params)
        changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew)
        if changeResids < eps():
            # update residuals
            resids = residsNew
            break
        # update residuals
        resids = residsNew

    return RegressionData(A,
                          y,
                          params=params,
                          resids=resids,
                          scale=scale,
                          weights=weights)
Beispiel #6
0
def mmestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]:
    r"""Two stage M estimate

    The two stage M estimate uses an initial mestimate with huber weights to give a measure of scale. A second M estimate is then performed using the calculated measure of scale. The second stage M estimate uses bisquare weights unless otherwise specified.

    Solves for :math:`x` where,

    .. math::        
        y = Ax .

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    initial : Dict
        Initial solution with parameters, scale and residuals
    scale : optional
        A scale estimate
    intercept : bool, optional
        True or False for adding an intercept term
    
    Returns
    -------
    RegressionData
        RegressionData instance with the parameters, residuals, weights and scale
    """
    from resistics.regression.moments import getScale
    import numpy.linalg as linalg

    options = parseKeywords(defaultOptions(), kwargs, printkw=False)
    intercept = options["intercept"]

    if "initial" in kwargs:
        # an initial solution is provided
        if "scale" not in kwargs["initial"]:
            kwargs["initial"]["scale"] = getScale(kwargs["initial"]["resids"],
                                                  "mad0")
        soln1 = mestimateModel(A,
                               y,
                               weights="huber",
                               initial=kwargs["initial"],
                               intercept=intercept)
        # update the scale in the initial solution and perform another mestimate
        kwargs["initial"]["scale"] = soln1.scale
        # now do another, but with a different weighting function
        soln2 = mestimateModel(A,
                               y,
                               weights="bisquare",
                               initial=kwargs["initial"],
                               intercept=intercept)
    else:
        # no initial solution, calculate one
        soln1 = mestimateModel(A, y, weights="huber", intercept=intercept)
        # now do another, but with a different weighting function
        soln2 = mestimateModel(A,
                               y,
                               weights="bisquare",
                               scale=soln1.scale,
                               intercept=intercept)

    return soln2