def _detect_outliers(self, xs, ys, outs, degree=2): xs = array(xs) ys = array(ys) mxs = masked_array(xs, mask=outs) # print 's', sum(mxs), outs mys = masked_array(ys, mask=outs) o = OLS(mxs, mys, fitdegree=degree) coeffs = o.get_coefficients() n = len(xs) - sum(outs) # coeff_errs = o.get_coefficient_standard_errors() # ymean = ys.mean() yeval = polyval(coeffs, xs) # calculate detection_tol. use error of fit devs = abs(ys - yeval) ssr = sum(devs ** 2) detection_tol = 2.5 * (ssr / ((n) - (degree))) ** 0.5 for i, xi, ys, di, mi in zip(xrange(len(xs)), xs, ys, devs, outs): if di > detection_tol: outs[i] = 1 omit = 'OK' if di <= detection_tol and not mi else 'User omitted' # print xi, ys, di, detection_tol, omit, mi return outs
def _detect_outliers(self, xs, ys, outs, degree=2): xs = array(xs) ys = array(ys) mxs = masked_array(xs, mask=outs) # print 's', sum(mxs), outs mys = masked_array(ys, mask=outs) o = OLS(mxs, mys, fitdegree=degree) coeffs = o.get_coefficients() n = len(xs) - sum(outs) # coeff_errs = o.get_coefficient_standard_errors() # ymean = ys.mean() yeval = polyval(coeffs, xs) # calculate detection_tol. use error of fit devs = abs(ys - yeval) ssr = sum(devs**2) detection_tol = 2.5 * (ssr / ((n) - (degree)))**0.5 for i, xi, ys, di, mi in zip(xrange(len(xs)), xs, ys, devs, outs): if di > detection_tol: outs[i] = 1 omit = 'OK' if di <= detection_tol and not mi else 'User omitted' # print xi, ys, di, detection_tol, omit, mi return outs
def regress(d, degree=2): # coeffs = polyfit(x, y, degree) # o = OLS(x, y, fitdegree=degree) o = OLS(*d, fitdegree=degree) return [o.get_coefficients()[2], o.get_coefficient_standard_errors()[2]]