def __call__(self, a, mu=None, initscale=None, axis=0): """ Compute Huber's proposal 2 estimate of scale, using an optional initial value of scale and an optional estimate of mu. If mu is supplied, it is not reestimated. Parameters ---------- a : array 1d array mu : float or None, optional If the location mu is supplied then it is not reestimated. Default is None, which means that it is estimated. initscale : float or None, optional A first guess on scale. If initscale is None then the standardized median absolute deviation of a is used. Notes ----- `Huber` minimizes the function sum(psi((a[i]-mu)/scale)**2) as a function of (mu, scale), where psi(x) = np.clip(x, -self.c, self.c) """ a = np.asarray(a) if mu is None: n = a.shape[0] - 1 mu = np.median(a, axis=axis) est_mu = True else: n = a.shape[0] mu = mu est_mu = False if initscale is None: scale = stand_mad(a, axis=axis) else: scale = initscale scale = tools.unsqueeze(scale, axis, a.shape) mu = tools.unsqueeze(mu, axis, a.shape) return self._estimate_both(a, scale, mu, axis, est_mu, n)
def _estimate_both(self, a, scale, mu, axis, est_mu, n): """ Estimate scale and location simultaneously with the following pseudo_loop: while not_converged: mu, scale = estimate_location(a, scale, mu), estimate_scale(a, scale, mu) where estimate_location is an M-estimator and estimate_scale implements the check used in Section 5.5 of Venables & Ripley """ for _ in range(self.maxiter): # Estimate the mean along a given axis if est_mu: if self.norm is None: # This is a one-step fixed-point estimator # if self.norm == norms.HuberT # It should be faster than using norms.HuberT nmu = np.clip(a, mu-self.c*scale, mu+self.c*scale).sum(axis) / a.shape[axis] else: nmu = norms.estimate_location(a, scale, self.norm, axis, mu, self.maxiter, self.tol) else: # Effectively, do nothing nmu = mu.squeeze() nmu = tools.unsqueeze(nmu, axis, a.shape) subset = np.less_equal(np.fabs((a - mu)/scale), self.c) card = subset.sum(axis) nscale = np.sqrt(np.sum(subset * (a - nmu)**2, axis) \ / (n * self.gamma - (a.shape[axis] - card) * self.c**2)) nscale = tools.unsqueeze(nscale, axis, a.shape) test1 = np.alltrue(np.less_equal(np.fabs(scale - nscale), nscale * self.tol)) test2 = np.alltrue(np.less_equal(np.fabs(mu - nmu), nscale*self.tol)) if not (test1 and test2): mu = nmu; scale = nscale else: return nmu.squeeze(), nscale.squeeze() raise ValueError('joint estimation of location and scale failed to converge in %d iterations' % self.maxiter)
def stand_mad(a, c=Gaussian.ppf(3/4.), axis=0): """ The standardized Median Absolute Deviation along given axis of an array. Parameters ---------- a : array-like Input array. c : float, optional The normalization constant. Defined as scipy.stats.norm.ppf(3/4.), which is approximately .6745. axis : int, optional The defaul is 0. Returns ------- mad : float `mad` = median(abs(`a`-median(`a`))/`c` """ a = np.asarray(a) d = np.median(a, axis = axis) d = tools.unsqueeze(d, axis, a.shape) return np.median(np.fabs(a - d)/c, axis = axis)