def line_fit_wls(x, y, u_y, label=None): """Return a weighted least-squares straight-line fit .. versionadded:: 1.2 :arg x: sequence of stimulus data (independent-variable) :arg y: sequence of response data (dependent-variable) :arg u_y: sequence of uncertainties in the response data :arg label: suffix to label the uncertain numbers `a` and `b` :returns: an object containing regression results :rtype: :class:`.LineFitWLS` **Example**:: >>> x = [1,2,3,4,5,6] >>> y = [3.2, 4.3, 7.6, 8.6, 11.7, 12.8] >>> u_y = [0.5,0.5,0.5,1.0,1.0,1.0] >>> fit = type_a.line_fit_wls(x,y,u_y) >>> fit.a_b InterceptSlope(a=ureal(0.8852320675105...,0.5297081435088...,inf), b=ureal(2.056962025316...,0.177892016741...,inf)) """ N = len(x) if N - 2 <= 0 or N != len(y) or N != len(u_y): raise RuntimeError( "Invalid sequences: len({!r}), len({!r}), len({!r})".format( x, y, u_y)) x = value_seq(x) y = value_seq(y) a_, b_, siga, sigb, r_ab, ssr, N = _line_fit_wls(x, y, u_y) a = ureal(a_, siga, inf, label='a_{}'.format(label) if label is not None else None, independent=False) b = ureal(b_, sigb, inf, label='b_{}'.format(label) if label is not None else None, independent=False) a.set_correlation(r_ab, b) return LineFitWLS(a, b, ssr, N)
def estimate(seq, label=None): """Return an uncertain number for the mean of the data in ``seq`` :arg seq: a sequence of data :arg str label: a label for the returned uncertain number :rtype: :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex` The elements of ``seq`` may be real numbers, complex numbers, or uncertain real or complex numbers. Note that only the value of uncertain numbers will be used. The function returns an :class:`~lib.UncertainReal` when the mean of the data is real, and an :class:`~lib.UncertainComplex` when the mean of the data is complex. In a type-A evaluation, the sample mean provides an estimate of the quantity of interest. The uncertainty in this estimate is the standard deviation of the sample mean (or the sample covariance of the mean, in the complex case). **Examples**:: >>> data = range(15) >>> type_a.estimate(data) ureal(7.0,1.1547005383792515,14) >>> data = [(0.91518731126816899+1.5213442955575518j), ... (0.96572684493613492-0.18547192979059401j), ... (0.23216598132006649+1.6951311687588568j), ... (2.1642786101267397+2.2024333895672563j), ... (1.1812532664590505+0.59062101107787357j), ... (1.2259264339405165+1.1499373179910186j), ... (-0.99422341300318684+1.7359338393131392j), ... (1.2122867690240853+0.32535154897909946j), ... (2.0122536479379196-0.23283009302603963j), ... (1.6770229536619197+0.77195994890476838j)] >>> type_a.estimate(data) ucomplex((1.059187840567141+0.9574410497332932j), u=[0.28881665310241805,0.2655555630050262], r=-0.3137404512459525, df=9) """ df = len(seq) - 1 if 0 >= df: raise RuntimeError("require: 0 >= len({!r})".format(seq)) df = len(seq) - 1 seq = value_seq(seq) mu = mean(seq) if isinstance(mu, complex): u, r = standard_uncertainty(seq, mu) return ucomplex(mu, u[0], u[1], r, df, label, independent=(r == 0.0)) else: u = standard_uncertainty(seq, mu) return ureal(mu, u, df, label, independent=True)
def estimate_digitized(seq, delta, label=None, truncate=False): """ Return an uncertain number for the mean of digitized data in ``seq`` :arg seq: data :type seq: float, :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex` :arg float delta: digitization step size :arg str label: label for uncertain number returned :arg bool truncate: if ``True``, truncation, rather than rounding, is assumed :rtype: :class:`~lib.UncertainReal` or :class:`~lib.UncertainComplex` A sequence of data that has been formatted with fixed precision can completely conceal a small amount of variability in the original values, or merely obscure that variability. This function recognises the possible interaction between truncation, or rounding, errors and random errors in the underlying data. The function evaluates the mean of the data and evaluates the uncertainty in this mean. Set the argument ``truncate`` to ``True`` if data have been truncated, instead of rounded. See reference: R Willink, *Metrologia*, **44** (2007) 73-81 **Examples**:: # LSD = 0.0001, data varies between -0.0055 and -0.0057 >>> seq = (-0.0056,-0.0055,-0.0056,-0.0056,-0.0056, ... -0.0057,-0.0057,-0.0056,-0.0056,-0.0057,-0.0057) >>> type_a.estimate_digitized(seq,0.0001) ureal(-0.005627272727272...,1.9497827808661...e-05,10) # LSD = 0.0001, data varies between -0.0056 and -0.0057 >>> seq = (-0.0056,-0.0056,-0.0056,-0.0056,-0.0056, ... -0.0057,-0.0057,-0.0056,-0.0056,-0.0057,-0.0057) >>> type_a.estimate_digitized(seq,0.0001) ureal(-0.005636363636363...,1.52120004824377...e-05,10) # LSD = 0.0001, no spread in data values >>> seq = (-0.0056,-0.0056,-0.0056,-0.0056,-0.0056, ... -0.0056,-0.0056,-0.0056,-0.0056,-0.0056,-0.0056) >>> type_a.estimate_digitized(seq,0.0001) ureal(-0.0056,2.8867513459481...e-05,10) # LSD = 0.0001, no spread in data values, fewer points >>> seq = (-0.0056,-0.0056,-0.0056) >>> type_a.estimate_digitized(seq,0.0001) ureal(-0.0056,3.2914029430219...e-05,2) """ N = len(seq) if N < 2: raise RuntimeError( "digitized data sequence must have more than one element") seq = value_seq(seq) x_max = max(seq) x_min = min(seq) mu = mean(seq) if x_max == x_min: # No scatter in the data if N == 2: root_c_12 = math.sqrt(6.4 / 12.0) elif N == 3: root_c_12 = math.sqrt(1.3 / 12.0) elif N >= 4: root_c_12 = math.sqrt(1.0 / 12.0) else: assert False, "should not occur" u = root_c_12 * delta else: accum = lambda psum, x: psum + (x - mu)**2 var = reduce(accum, seq, 0.0) / (N - 1) if abs(x_max - x_min - delta) < 10 * sys.float_info.epsilon: # Scatter is LSD only x_mid = (x_max + x_min) / 2.0 u = math.sqrt(max(var / N, (x_mid - mu)**2 / 3.0)) else: u = math.sqrt(var / N) if truncate: mu += delta / 2.0 return ureal(mu, u, N - 1, label, independent=True)
def line_fit_rwls(x, y, s_y, label=None): """Return a relative weighted least-squares straight-line fit .. versionadded:: 1.2 The ``s_y`` values are used to scale variability in the ``y`` data. It is assumed that the standard deviation of each ``y`` value is proportional to the corresponding ``s_y`` scale factor. The unknown common factor in the uncertainties is estimated from the residuals. :arg x: sequence of stimulus data (independent-variable) :arg y: sequence of response data (dependent-variable) :arg s_y: sequence of scale factors :arg label: suffix to label the uncertain numbers `a` and `b` :returns: an object containing regression results :rtype: :class:`.LineFitRWLS` **Example**:: >>> x = [1,2,3,4,5,6] >>> y = [3.014,5.225,7.004,9.061,11.201,12.762] >>> s_y = [0.2,0.2,0.2,0.4,0.4,0.4] >>> fit = type_a.line_fit_rwls(x,y,s_y) >>> a, b = fit.a_b >>> >>> print(fit) <BLANKLINE> Relative Weighted Least-Squares Results: <BLANKLINE> Intercept: 1.14(12) Slope: 1.973(41) Correlation: -0.87 Sum of the squared residuals: 1.3395217958... Number of points: 6 <BLANKLINE> """ N = len(x) df = N - 2 if df <= 0 or N != len(y) or N != len(s_y): raise RuntimeError( "Invalid sequences: len({!r}), len({!r}), len({!r})".format( x, y, s_y)) x = value_seq(x) y = value_seq(y) a_, b_, siga, sigb, r_ab, ssr, N = _line_fit_wls(x, y, s_y) sigma_hat = math.sqrt(ssr / df) siga *= sigma_hat sigb *= sigma_hat a = ureal(a_, siga, df, label='a_{}'.format(label) if label is not None else None, independent=False) b = ureal(b_, sigb, df, label='b_{}'.format(label) if label is not None else None, independent=False) real_ensemble((a, b), df) a.set_correlation(r_ab, b) return LineFitRWLS(a, b, ssr, N)
def line_fit(x, y, label=None): """Return a least-squares straight-line fit to the data .. versionadded:: 1.2 :arg x: sequence of stimulus data (independent-variable) :arg y: sequence of response data (dependent-variable) :arg label: suffix to label the uncertain numbers `a` and `b` :returns: an object containing regression results :rtype: :class:`.LineFitOLS` Performs an ordinary least-squares regression of ``y`` to ``x``. **Example**:: >>> x = [1,2,3,4,5,6,7,8,9] >>> y = [15.6,17.5,36.6,43.8,58.2,61.6,64.2,70.4,98.8] >>> result = type_a.line_fit(x,y) >>> a,b = result.a_b >>> a ureal(4.8138888888888...,4.8862063121833...,7) >>> b ureal(9.4083333333333...,0.8683016476563...,7) >>> y_p = a + b*5.5 >>> dof(y_p) 7.0 """ N = len(x) df = N - 2 if df <= 0 or N != len(y): raise RuntimeError("Invalid sequences: len({!r}), len({!r})".format( x, y)) x = value_seq(x) y = value_seq(y) S_x = math.fsum(x) S_y = math.fsum(y) k = S_x / N t = [(x_i - k) for x_i in x] S_tt = math.fsum(t_i * t_i for t_i in t) b_ = math.fsum(t_i * y_i / S_tt for t_i, y_i in izip(t, y)) a_ = (S_y - b_ * S_x) / N siga = math.sqrt((1.0 + S_x * S_x / (N * S_tt)) / N) sigb = math.sqrt(1.0 / S_tt) r_ab = -S_x / (N * S_tt * siga * sigb) # Sum of squared residuals needed to correctly calculate parameter uncertainties f = lambda x_i, y_i: (y_i - a_ - b_ * x_i)**2 ssr = math.fsum(f(x_i, y_i) for x_i, y_i in izip(x, y)) data_u = math.sqrt(ssr / df) siga *= data_u sigb *= data_u a = ureal(a_, siga, df=df, label='a_{}'.format(label) if label is not None else None, independent=False) b = ureal(b_, sigb, df=df, label='b_{}'.format(label) if label is not None else None, independent=False) real_ensemble((a, b), df) a.set_correlation(r_ab, b) return LineFitOLS(a, b, ssr, N)
def variance_covariance_complex(seq, mu=None): """Return the sample variance-covariance matrix :arg seq: sequence of data :arg mu: the arithmetic mean of ``seq`` :returns: a 4-element sequence If ``mu`` is ``None`` the mean will be evaluated by :func:`~type_a.mean`. ``seq`` may contain numbers or uncertain numbers. Only the values of uncertain numbers are used in calculations. Variance-covariance matrix elements are returned in a :obj:`~named_tuples.VarianceCovariance` namedtuple; they can be accessed using the attributes ``.rr``, ``.ri``, ``,ir`` and ``.ii``. **Example**:: >>> data = [(0.91518731126816899+1.5213442955575518j), ... (0.96572684493613492-0.18547192979059401j), ... (0.23216598132006649+1.6951311687588568j), ... (2.1642786101267397+2.2024333895672563j), ... (1.1812532664590505+0.59062101107787357j), ... (1.2259264339405165+1.1499373179910186j), ... (-0.99422341300318684+1.7359338393131392j), ... (1.2122867690240853+0.32535154897909946j), ... (2.0122536479379196-0.23283009302603963j), ... (1.6770229536619197+0.77195994890476838j)] >>> type_a.variance_covariance_complex(data) VarianceCovariance(rr=0.8341505910928249, ri=-0.24062910264062262, ir=-0.24062910264062262, ii=0.7051975704291644) >>> v = type_a.variance_covariance_complex(data) >>> v[0] 0.8341505910928249 >>> v.rr 0.8341505910928249 >>> v.ii 0.7051975704291644 """ df = len(seq) - 1 if 0 >= df: raise RuntimeError("require: 0 >= len({!r})".format(seq)) zseq = value_seq(seq) if mu is None: mu = mean(zseq) mu = complex(mu) accum_vr = lambda psum, z: psum + (z.real - mu.real)**2 accum_vi = lambda psum, z: psum + (z.imag - mu.imag)**2 accum_cv = lambda psum, z: psum + (z.imag - mu.imag) * (z.real - mu.real) cv_11 = reduce(accum_vr, zseq, 0.0) / df cv_22 = reduce(accum_vi, zseq, 0.0) / df cv_12 = reduce(accum_cv, zseq, 0.0) / df return VarianceCovariance(cv_11, cv_12, cv_12, cv_22)