def test_leastSquares() -> None: """The weights for least squares solution""" from resistics.regression.weights import leastSquares, getWeights import numpy as np data = np.array([1, 2, 3, 4, 5, 6]) weights = leastSquares(data) getweights = getWeights(data, "leastsquares") np.testing.assert_equal(weights, np.ones(data.size)) np.testing.assert_equal(getweights, weights)
def test_andrewsWave() -> None: """The weights for Andrews wave""" from resistics.regression.weights import andrewsWave, getWeights import statsmodels.api as sm import numpy as np data = np.array([1, 2, 3, 4, 5, 6]) weights = andrewsWave(data) getweights = getWeights(data, "andrewsWave") awave = sm.robust.norms.AndrewWave() smweights = awave.weights(data) np.testing.assert_equal(weights, smweights) np.testing.assert_equal(getweights, smweights)
def test_trimmedMean() -> None: """The weights for trimmed mean""" from resistics.regression.weights import trimmedMean, getWeights import statsmodels.api as sm import numpy as np data = np.array([1, 2, 3, 4, 5, 6]) weights = trimmedMean(data) getweights = getWeights(data, "trimmedMean") tmean = sm.robust.norms.TrimmedMean() smweights = tmean.weights(data) np.testing.assert_equal(weights, smweights) np.testing.assert_equal(getweights, smweights)
def test_hampel() -> None: """The weights for Hampel""" from resistics.regression.weights import hampel, getWeights import statsmodels.api as sm import numpy as np data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) weights = hampel(data) getweights = getWeights(data, "hampel") hamp = sm.robust.norms.Hampel() smweights = hamp.weights(data) np.testing.assert_equal(weights, smweights) np.testing.assert_equal(getweights, smweights)
def test_huber() -> None: """The weights for Huber""" from resistics.regression.weights import huber, getWeights import statsmodels.api as sm import numpy as np data = np.array([1, 2, 3, 4, 5, 6]) weights = huber(data) getweights = getWeights(data, "huber") hub = sm.robust.norms.HuberT() smweights = hub.weights(data) np.testing.assert_equal(weights, smweights) np.testing.assert_equal(getweights, smweights)
def test_bisquare() -> None: """The weights for bisquare""" from resistics.regression.weights import bisquare, getWeights import statsmodels.api as sm import numpy as np data = np.array([1, 2, 3, 4, 5, 6]) weights = bisquare(data) getweights = getWeights(data, "bisquare") bisq = sm.robust.norms.TukeyBiweight() smweights = bisq.weights(data) np.testing.assert_equal(weights, smweights) np.testing.assert_equal(getweights, smweights)
def chatterjeeMachlerMod(A, y, **kwargs): # using the weights in chaterjeeMachler means that min resids val in median(resids) # instead, use M estimate weights with a modified residual which includes a measure of leverage # for this, use residuals / (1-p)^2 # I wonder if this will have a divide by zero bug from resistics.common.math import eps from resistics.regression.moments import getLocation, getScale from resistics.regression.weights import getWeights from resistics.regression.robust import defaultOptions, applyWeights, olsModel import numpy.linalg as linalg # now calculate p and n n = A.shape[0] p = A.shape[1] pnRatio = 1.0 * p / n # calculate the projection matrix q, r = linalg.qr(A) Pdiag = np.empty(shape=(n), dtype="float") for i in range(0, n): Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real del q, r Pdiag = Pdiag / (np.max(Pdiag) + 0.0000000001) locP = getLocation(Pdiag, "median") scaleP = getScale(Pdiag, "mad") # bound = locP + 6*scaleP bound = locP + 6 * scaleP indices = np.where(Pdiag > bound) Pdiag[indices] = 0.99999 leverageMeas = np.power(1.0 - Pdiag, 2) # weights for the first iteration # this is purely based on the leverage tmp = np.ones(shape=(n), dtype="float") * pnRatio tmp = np.maximum(Pdiag, tmp) weights = np.reciprocal(tmp) # get options options = parseKeywords(defaultOptions(), kwargs, printkw=False) # generalPrint("S-Estimate", "Using weight function = {}".format(weightFnc)) if options["intercept"] == True: # add column of ones for constant term A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A)) # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system) if np.sum(np.absolute(residsNew)) < eps(): # then return everything here return paramsNew, residsNew, weights residsNew = residsNew / leverageMeas scale = getScale(residsNew, "mad0") # standardise and calculate weights residsNew = residsNew / scale weightsNew = getWeights(residsNew, "huber") # increment iteration iteration = iteration + 1 weights = weightsNew params = paramsNew if iteration > 1: # check to see whether the change is smaller than the tolerance changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update resids resids = residsNew break # update resids resids = residsNew # now do the same again, but with a different function # do the least squares solution params, resids, squareResid, rank, s = olsModel(A, y) resids = resids / leverageMeas resids = resids / scale weights = getWeights(resids, "trimmedMean") # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system) if np.sum(np.absolute(residsNew)) < eps(): # then return everything here return paramsNew, residsNew, weights residsNew = residsNew / leverageMeas scale = getScale(residsNew, "mad0") # standardise and calculate weights residsNew = residsNew / scale weightsNew = getWeights(residsNew, options["weights"]) # increment iteration iteration = iteration + 1 weights = weightsNew params = paramsNew # check to see whether the change is smaller than the tolerance changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update resids resids = residsNew break # update resids resids = residsNew # at the end, return the components return params, resids, weights
def mestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Dict[str, Any]: r"""Mestimate robust least squares Solves for :math:`x` where, .. math:: y = Ax . Good method for dependent outliers (in :math:`y`). Not robust against independent outliers (leverage points) Parameters ---------- A : np.ndarray Predictors, size nobs*nregressors y : np.ndarray Observations, size nobs initial : Dict Initial model parameters and scale scale : optional A scale estimate intercept : bool, optional True or False for adding an intercept term weights : str, optional The weights to use Returns ------- RegressionData RegressionData instance with the parameters, residuals, weights and scale """ from resistics.common.math import eps from resistics.regression.moments import getLocation, getScale from resistics.regression.weights import getWeights from resistics.regression.data import RegressionData import numpy.linalg as linalg options = parseKeywords(defaultOptions(), kwargs, printkw=False) # calculate the leverage n = A.shape[0] p = A.shape[1] # calculate the projection matrix q, r = linalg.qr(A) Pdiag = np.empty(shape=(n), dtype="float") for ii in range(0, n): Pdiag[ii] = np.absolute(np.sum(q[ii, :] * np.conjugate(q[ii, :]))).real Pdiag = Pdiag / np.max(Pdiag) leverageScale = getScale(Pdiag, "mad0") leverageWeights = getWeights(Pdiag / leverageScale, "huber") if options["intercept"] == True: # add column of ones for constant term A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A)) # see whether to do an initial OLS model or whether one is provided if options["initial"]: params, resids, scale = initialModel(options["initial"]) else: soln = olsModel(A, y) resids = soln.resids scale = getScale(resids, "mad0") # if an initial model was not provided but an initial scale was, replace the one here if options["scale"]: scale = options["scale"] # standardised residuals and weights weights = getWeights(resids / scale, options["weights"]) * leverageWeights # iteratively weighted least squares iteration = 0 while iteration < options["maxiter"]: # do the weighted least-squares Anew, ynew = applyWeights(A, y, weights) paramsNew, _squareResidNew, _rankNew, _sNew = linalg.lstsq(Anew, ynew, rcond=None) residsNew = y - np.dot(A, paramsNew) if np.sum(np.absolute(residsNew)) < eps(): return RegressionData(A, y, params=paramsNew, resids=residsNew, scale=scale, weights=weights) # standardise and calculate weights scale = getScale(residsNew, "mad0") weightsNew = getWeights(residsNew / scale, options["weights"]) * leverageWeights # increment iteration and save weightsNew iteration = iteration + 1 weights = weightsNew params = paramsNew # check to see whether the change is smaller than the tolerance # use the R method of checking change in residuals (can check change in params) changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew) if changeResids < eps(): # update residuals resids = residsNew break # update residuals resids = residsNew return RegressionData(A, y, params=params, resids=resids, scale=scale, weights=weights)