Ejemplo n.º 1
0
def predband(x, xd, yd, p, func, conf=0.95):
    """
    This function estimates the prediction bands for the specified function
    https://codereview.stackexchange.com/questions/84414/obtaining-prediction-bands-for-regression-model
    """
    # x = requested points
    # xd = x data
    # yd = y data
    # p = parameters
    # func = function name
    alpha = 1.0 - conf  # significance
    N = len(xd)  # data sample size
    var_n = len(p)  # number of parameters
    # Quantile of Student's t distribution for p=(1-alpha/2)
    from scipy.stats.distributions import t
    q = t.ppf(1.0 - alpha / 2.0, N - var_n)
    # Stdev of an individual measurement
    se = np.sqrt(1. / (N - var_n) * \
                 np.sum((yd - func(xd, *p)) ** 2))
    # Auxiliary definitions
    sx = (x - xd.mean())**2
    sxd = np.sum((xd - xd.mean())**2)
    # Predicted values (best-fit model)
    yp = func(x, *p)
    # Prediction band
    dy = q * se * np.sqrt(1.0 + (1.0 / N) + (sx / sxd))
    # Upper & lower prediction bands.
    lpb, upb = yp - dy, yp + dy
    return lpb, upb
Ejemplo n.º 2
0
def fit_curves(g, fn, arg_names, p0):
    vals = g.dropna()
    output = None

    try:
        x = np.tile(timepoints, len(vals))
        y = vals.values.flatten()

        popt, pcov = curve_fit(fn, x, y, p0=p0, bounds=bounds)
        # method of extracting parameter CI values taken from
        # http://kitchingroup.cheme.cmu.edu/blog/2013/02/12/Nonlinear-curve-fitting-with-parameter-confidence-intervals/
        # other ref:
        # https://stackoverflow.com/a/60412600/383744

        alpha = 0.05  # 95% confidence interval = 100*(1-alpha)
        n = len(y)  # number of data points
        p = len(popt)  # number of parameters
        dof = max(0, n - p)  # number of degrees of freedom

        tval = t_distribution.ppf(1.0 - alpha / 2., dof)

        var = np.diag(pcov)
        sigma_k = np.sqrt(var[-1])
        ci_k = tval * sigma_k

        model_predictions = fn(x, *popt)
        abs_error = model_predictions - y

        r_squared = 1 - (np.var(abs_error) / np.var(y))
        output = [*popt, r_squared, ci_k]

    except (ValueError, RuntimeError) as e:
        output = [np.nan] * len(arg_names)

    return pd.Series(dict(zip(arg_names, output)))
Ejemplo n.º 3
0
def nlinfit(model, x, y, p0, alpha=0.05):
    '''Nonlinear regression with confidence intervals.
    
    x is the independent data
    y is the dependent data
    model has a signature of f(x, p0, p1, p2, ...)
    p0 is the initial guess of the parameters

    returns [p, pint, SE]
      p is an array of the fitted parameters
      pint is an array of confidence intervals
      SE is an array of standard errors for the parameters.
    '''
    pars, pcov = curve_fit(model, x, y, p0=p0)
    n = len(y)    # number of data points
    p = len(pars) # number of parameters

    dof = max(0, n - p) # number of degrees of freedom

    # student-t value for the dof and confidence level
    tval = t.ppf(1.0-alpha/2., dof) 

    SE = []
    pint = []
    for i, p,var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        SE.append(sigma)
        pint.append([p - sigma * tval, p + sigma * tval])

    return (pars, np.array(pint), np.array(SE))
Ejemplo n.º 4
0
def conf95(stdev, Ndata, Npar):
    """Calculate (one half) of the symmetric 95% confidence interval.
    
    The symmetric 95% confidence interval is calculated from the standard
    deviation and number of degrees of freedom, using Student's t
    distribution.
    
    Parameters
    ----------
    stdev : float
        Standard deviation.
    Ndata : int
        Number of data points.
    Npar : int
        Number of parameters.
    
    Returns
    -------
    float
        The half-width of the 95% confidence interval, such that it
        can be reported in the tradtional +/- manner.
    
    based on:
    http://kitchingroup.cheme.cmu.edu/blog/2013/02/12/Nonlinear-curve-fitting-with-parameter-confidence-intervals/
    """
    alpha = 0.05  # 95% confidence interval = 100*(1-alpha)
    dof = max(0, Ndata - Npar)
    tval = student_t.ppf(1.0 - alpha / 2., dof)
    return (stdev * tval)
def compute_coefficient_intervals( y_test, Theta ):
    alpha = 0.05    # 95% confidence interval = 100*(1-alpha)
    n  = len(y_test) # number of data points
    p  = len(Theta)  # number of parameters
    df = max(0, n - p) # number of degrees of freedom
    # student-t value for the dof and confidence level
    tval = t.ppf(1.0-alpha/2., df) 
Ejemplo n.º 6
0
def stats(n, p_opt, p_cov, p_labels, alpha=0.05):

    """
    For explanations on this, see:
    https://www.mathworks.com/help/curvefit/confidence-and-prediction-bounds.html
    http://kitchingroup.cheme.cmu.edu/blog/2013/02/12/Nonlinear-curve-fitting-with-parameter-confidence-intervals/
    """

    # Compute p err -------------------------
    try:
        p_err = np.sqrt(np.diag(p_cov))
    except FloatingPointError:
        d = np.diag(p_cov)
        d.setflags(write=True)
        d[d < 0] = 0
        p_err = np.sqrt(d)

    # Compute t -----------------------------
    p = len(p_opt)  # number of parameters

    dof = max(0, n - p)  # number of degrees of freedom

    #  Inverse of Student's t cumulative distribution function
    tval = t.ppf(1.0 - alpha / 2., dof)

    # Compute ci ---------------------------
    r = {}
    for i, (pr, std) in enumerate(zip(p_opt, p_err)):

        ci = std * tval
        print(f'{PARAM_LABELS[i]}: {pr:.2f} [{pr - ci:.2f}  {pr + ci:.2f}]')
        r[f"{p_labels[i]}-CI"] = (pr - ci, pr + ci)

    return r
Ejemplo n.º 7
0
def fitSingle(xdata, ydata, guess):
    try:
        popt, pcov = curve_fit(singleExp, xdata.T, ydata.T, p0=guess)


        print('a = ' + str(popt[0]))
        print('t1 = '+ str(popt[1]))
        print('offset = '+str(popt[2]))
 
        #np.savetxt(f+'_fit.csv', popt, delimiter = ',')
        #np.savetxt(f+'_fit_cov.csv', popt, delimiter = ',')
        
        # from http://kitchingroup.cheme.cmu.edu/blog/2013/02/12/Nonlinear-curve-fitting-with-parameter-confidence-intervals/

        alpha = 0.05 # 95% confidence interval = 100*(1-alpha)
    
        n = len(ydata)    # number of data points
        p = len(popt) # number of parameters
        
        dof = max(0, n - p) # number of degrees of freedom
        
        # student-t value for the dof and confidence level
        tval = t.ppf(1.0-alpha/2., dof)
        params = []
        for i, p,var in zip(range(n), popt, np.diag(pcov)):
            sigma = var**0.5
            params.append('p{0}: {1} [{2}  {3}]'.format(i, p,p - sigma*tval,p + sigma*tval))
        #print(params)
        #with open(f+'_fit_param_confidence_int.txt', 'w') as newfile:
        #    newfile.write('\n'.join(params))
        return(popt, pcov, params)
    except RuntimeError:
        print("Error - curve_fit failed")
        return(0,0,0)
Ejemplo n.º 8
0
def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
                    alpha=0.05, axis=None):
    """Returns the selected confidence interval of the trimmed mean along the
given axis.

Parameters
----------
    data : sequence
        Input data. The data is transformed to a masked array
    proportiontocut : float
        Proportion of the data to cut from each side of the data .
        As a result, (2*proportiontocut*n) values are actually trimmed.
    alpha : float
        Confidence level of the intervals.
    inclusive : tuple of boolean
        If relative==False, tuple indicating whether values exactly equal to the
        absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being masked
        on each side should be rounded (True) or truncated (False).
    axis : int
        Axis along which to cut. If None, uses a flattened version of the input.

    """
    data = ma.array(data, copy=False)
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = mstats.trimmed_stde(data,limits=limits,inclusive=inclusive,axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1-alpha/2.,df)
    return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
Ejemplo n.º 9
0
def trimmed_mean_ci(data, limits=(0.2, 0.2), inclusive=(True, True), alpha=0.05, axis=None):
    """Returns the selected confidence interval of the trimmed mean along the
given axis.

Parameters
----------
    data : sequence
        Input data. The data is transformed to a masked array
    proportiontocut : float
        Proportion of the data to cut from each side of the data .
        As a result, (2*proportiontocut*n) values are actually trimmed.
    alpha : float
        Confidence level of the intervals.
    inclusive : tuple of boolean
        If relative==False, tuple indicating whether values exactly equal to the
        absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being masked
        on each side should be rounded (True) or truncated (False).
    axis : int
        Axis along which to cut. If None, uses a flattened version of the input.

    """
    data = ma.array(data, copy=False)
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = mstats.trimmed_stde(data, limits=limits, inclusive=inclusive, axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1 - alpha / 2.0, df)
    return np.array((tmean - tppf * tstde, tmean + tppf * tstde))
Ejemplo n.º 10
0
    def predict(self, x_eval, alpha=0.05):
        """
        0.05 means 95 % confidence interval
        """
        self.fit(lower=self.lower, upper=self.upper)
        n = self.n
        p = self.p
        print r('fit')
        dof = n - p
        tval = t.ppf(1 - alpha / 2., dof)

        y_hat = np.empty(x_eval.shape)
        y_hat_se = np.empty(x_eval.shape)
        for i in xrange(len(x_eval)):
            li = self.eq.rsplit('x', 1)
            eq_x = ('%f' % x_eval[i]).join(li)
            #eq_x = self.eq.replace('x','%f'%x_eval[i])

            #foo = np.array(r('delta.method(fit, "%s")'%eq_x))
            foo = np.array(r('deltaMethod(fit, "%s")' % eq_x))
            y_hat[i] = foo[0]
            y_hat_se[i] = foo[1]
        y_ul = y_hat + tval * y_hat_se
        y_ll = y_hat - tval * y_hat_se

        return y_hat, y_ul, y_ll
Ejemplo n.º 11
0
    def interval_confidence(self, x_tot, y_tot):
        def retta(x, p0, p1):
            return p0*x + p1

        par1, par2 = curve_fit(retta, x_tot, y_tot)
        y_fit2 = retta(x_tot, par1[0], par1[1])

        m = par1[0]
        q = par1[1]
        # residual = y_tot - y_fit2
        # ss_res = np.sum(residual**2)
        ss_tot = np.sum((y_tot - np.mean(y_tot))**2)
        if ss_tot == 0:
            ss_tot = 1
            # ss_res = 1
        # r2 = 1- (ss_res/ss_tot)
        # p = len(par1)
        # n = len(x_tot)
        alpha = 0.05 #95% confidence interval
        dof = max(0, len(x_tot) - len(par1)) #degree of freedom
        tval = tstud.ppf(1.0 - alpha/2., dof) #t-student value for the dof and confidence level
        sigma = np.diag(par2)**0.5
        m_err = sigma[0]*tval
        q_err = sigma[1]*tval

        y_fit2_up = retta(x_tot, m+(m_err/2), q+(q_err/2))
        y_fit2_down = retta(x_tot, m-(m_err/2), q-(q_err/2))

        return y_fit2, m, m_err, q, q_err, y_fit2_up, y_fit2_down
Ejemplo n.º 12
0
def grubbs_test(xvec, sgnf, null='equal'):
    """
    The Grubbs' test for outlier detection

    Parameters
    ----------
    xvec : ndarray, shape (n,)
        Array of sample values.
    sgnf : float
        Level of significance for the test.
    null : str
        Options.

    Notes
    -----
    Specification of the null hypothesis can either be 'equal'
    (default), 'greater', or 'less'. Only 'equal' is implemented.

    Missings (i.e. NaN values) are ignored.
    """
    if np.not_equal(np.nanvar(xvec), 0):
        return None

    n = sum(~np.isnan(xvec))
    m = np.nanmean(xvec)
    s = np.sqrt(np.nanvar(xvec))
    candidates = abs(xvec - m)
    gstat = np.nanmax(candidates) / s
    tstat = t.ppf(q=1 - (sgnf / (2 * n)), df=n - 2)**2
    crval = ((n - 1) / np.sqrt(n)) * np.sqrt(tstat / (n - 2 + tstat))
    if (gstat > crval):
        return np.nanargmax(candidates)
    else:
        return None
Ejemplo n.º 13
0
def GetExpFit(dDistr, infLimit, supLimit, alpha=0.05):

    from scipy.stats.distributions import t
    from scipy.optimize import curve_fit

    dDistr_Log = np.zeros((supLimit - infLimit, 2))
    for k in range(supLimit - infLimit):
        dDistr_Log[k, 0] = k + infLimit
        dDistr_Log[k, 1] = dDistr[k + infLimit]

    def exponenial_func(x, a, b, c):
        return a * np.power(x, -b) + c

    popt, pcov = curve_fit(exponenial_func,
                           dDistr_Log[:, 0],
                           dDistr_Log[:, 1],
                           p0=(1.0, 1.0, 1.0))

    perr = np.sqrt(np.diag(pcov))

    n = supLimit - infLimit  # number of data points
    p = 3  # number of parameters
    dof = max(0, n - p)  # number of degrees of freedom

    tval = t.ppf(1.0 - alpha / 2., dof)

    Interval = perr[1]
    corrInterval = perr[1] * tval
    Exp = popt[1]

    return Exp, Interval, corrInterval
Ejemplo n.º 14
0
def errors(estimates, covariance, dof, alpha=0.05):
    """Calculate approximate SD and confidence limits. Default: 95%"""
    tval = t.ppf(1.0 - alpha / 2., dof)  #student-t value
    sigma = np.sqrt(np.diag(covariance))
    confidence_limits = np.array(
        [estimates - sigma * tval, estimates + sigma * tval]).T
    return sigma, confidence_limits
    def get_interval(self):
        self.degrees_of_freedom = self.trials - 1
        half_alpha = (1 - (self.confidence_interval * 10**(-2))) / 2
        t_val = tdist.ppf(1 - half_alpha, self.degrees_of_freedom)
        error_val = (sqrt((self.p_hat * (1 - self.p_hat)) / self.trials))

        interval = (self.p_hat - (t_val * error_val),
                    self.p_hat + (t_val * error_val))
        return interval
Ejemplo n.º 16
0
    def NewState(self):
        global START_TIME
        power = self.autoscaler.PowerRequestsPerSecond() * 60
        power_per_node = (power / len(self.autoscaler.nodes))
        # how many requests I got in the last minute
        last_requests = sum(self.autoscaler.num_requests)
        if self.use_predictions:
            x = range(1, 13)
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                x, self.autoscaler.num_requests)

            x = np.arange(1, 13)
            y = np.array(self.autoscaler.num_requests)
            y_err = y - (intercept + slope * x)
            p_x = np.arange(13, 25)
            mean_x = np.mean(x)
            alpha = 0.05
            n = len(x)
            t_ = t.ppf(1 - alpha / 2, n - 2)
            s_err = np.sum(np.power(y_err, 2))
            confs = t_ * np.sqrt((s_err / (n - 2)) * (1.0 / n + (np.power(
                (p_x - mean_x), 2) / ((np.sum(np.power(x, 2))) - n *
                                      (np.power(mean_x, 2))))))
            p_y = slope * p_x + intercept
            lower = p_y - abs(confs)
            upper = p_y + abs(confs)

            prediction = sum(
                [max(0, intercept + slope * x) for x in range(13, 25)])
            print 'Last requests:', last_requests
            self.out.write('%s Last requests: %s prediction: %s upper: %s\n' %
                           (time.time() - START_TIME, last_requests,
                            prediction, sum(upper)))
            print 'Prediction: ', prediction, 'Upper:', sum(
                upper), 'Lower: ', sum(lower)
            # I'm being conservative here, and not decreasing the number of nodes
            # before the load goes down. I'm also using the upper bound of 95%
            # confidence interval.
            last_requests = max(sum(upper), last_requests)
            # last_requests = max(prediction, last_requests)

        print 'Current power:', power, 'requests to handle:', prediction
        min_cost = sys.maxint
        n_nodes = 1
        nodes = [x for x in self.autoscaler.possible_nodes if not x.IsDown()]
        for i in range(1, len(nodes) + 1):
            cost = self.ExpectedCost(i, power_per_node, last_requests)
            print i, 'nodes, P(SLA): ', self.ProbSLAViolation(
                i, power_per_node, last_requests),
            print 'cost:', cost
            if cost < min_cost:
                min_cost, n_nodes = cost, i
        nodes = nodes[:n_nodes]
        state = {x.name: str(x.AvgPredictionTime()) for x in nodes}
        self.autoscaler.nodes = nodes
        sys.stdout.flush()
        return state
Ejemplo n.º 17
0
def fit_curve(f, X, Y, alpha=.05):
    x, y = map(np.asarray, [X, Y])
    pars, pcov = curve_fit(f, x, y)
    n = len(y)
    p = len(pars)
    dof = max(0, n - p)
    tval = t.ppf(1.0 - alpha / 2., dof)
    conf = [tval * elem**.5 for elem in np.diag(pcov)]
    return pars, conf
Ejemplo n.º 18
0
def regress(A, y, alpha=None):
    '''Linear regression with confidence intervals.

    A is a matrix of function values in columns, e.g.
    A = np.column_stack([T**0, T**1, T**2, T**3, T**4])

    y is a vector of values you want to fit

    alpha is for the 100*(1 - alpha) confidence level

    returns: [b, bint, se]
      b is a vector of the fitted parameters
      bint is a 2D array of confidence intervals
      se is an array of standard error for each parameter.

    The confidence intervals account for sample size using a student T
    multiplier.

    This code is derived from the descriptions at
    http://www.weibull.com/DOEWeb/confidence_intervals_in_multiple_linear_regression.htm
    and
    http://www.weibull.com/DOEWeb/estimating_regression_models_using_least_squares.htm

    '''

    b, res, rank, s = np.linalg.lstsq(A, y)

    bint, se = None, None

    if alpha is not None:
        # compute the confidence intervals
        n = len(y)
        k = len(b)

        errors = y - np.dot(A, b)
        sigma2 = np.sum(errors**2) / (n - k)  # RMSE

        covariance = np.linalg.inv(np.dot(A.T, A))

        C = sigma2 * covariance
        dC = np.diag(C)

        if (dC < 0.0).any():
            warnings.warn('\n{0}\ndetected a negative number in your'
                          'covariance matrix. Taking the absolute value'
                          'of the diagonal. something is probably wrong'
                          'with your data or model'.format(dC))
            dC = np.abs(dC)

        se = np.sqrt(dC)  # standard error

        sT = t.ppf(1.0 - alpha/2.0, n - k - 1)  # student T multiplier
        CI = sT * se

        bint = np.array([(beta - ci, beta + ci) for beta, ci in zip(b, CI)])

    return (b, bint, se)
Ejemplo n.º 19
0
def trimmed_mean_ci(data,
                    limits=(0.2, 0.2),
                    inclusive=(True, True),
                    alpha=0.05,
                    axis=None):
    """
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input area_data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked area_data, as floats between 0. and 1. If ``n``
        is the number of unmasked area_data before trimming, then
        (``n * limits[0]``)th smallest area_data and (``n * limits[1]``)th
        largest area_data are masked.  The total number of unmasked area_data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of area_data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `area_data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed area_data.

    """
    data = ma.array(data, copy=False)
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = mstats.trimmed_stde(data,
                                limits=limits,
                                inclusive=inclusive,
                                axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1 - alpha / 2., df)
    return np.array((tmean - tppf * tstde, tmean + tppf * tstde))
Ejemplo n.º 20
0
def do_curve_fit(func, x, y, alpha=0.05):
    x = np.array(x, dtype=np.int64)
    y = np.array(y, dtype=np.int64)
    coeffs, pcov = curve_fit(func, x, y)
    n = len(y)
    p = len(coeffs)
    dof = max(0, n - p) # number of degrees of freedom
    # student-t value for the dof and confidence level
    tval = t.ppf(1.0-alpha/2.0, dof) 
    cl = np.multiply(np.sqrt(np.diag(pcov)), tval)
    return coeffs, cl
Ejemplo n.º 21
0
 def sample(self, x, use_stddev=False):
     mu, nu, alpha, beta = self.B[x, :]
     scale = np.square(self.w) * max(0, beta * (nu + 1) / (alpha * nu)) # Make sure that the scale is >= 0
     df = 2 * alpha
     try:
         r = t.ppf(q=self.q, df=df, loc=mu, scale=scale)
     except:
         print(scale)
         exit()
     b = (r - mu)
     return b
Ejemplo n.º 22
0
def pcov_t_cvft(ydata_to_fit,p_cvft,pcov_cvft,alpha):
    # diagonal elements of the covariance matrix gives sigma    
    delta_p_cvft=np.sqrt(np.diag(pcov_cvft))
    # degrees of freedom, should be positive or zero
    dof=len(ydata_to_fit)-len(p_cvft) 
    # get t value from t distribution
    from scipy.stats.distributions import t
    tval=t.ppf(1.0-alpha/2.0,dof)
    # calc t distribution corrected sigma
    delta_p_cvft=delta_p_cvft*tval*2
    return delta_p_cvft
Ejemplo n.º 23
0
def confint(n: int,
            pars: np.ndarray,
            pcov: np.ndarray,
            confidence: float = 0.95,
            **kwargs):
    """
    This function returns the confidence interval for each parameter

    Parameters
    ----------
    n : int
        The number of data points
    pars : [double]
        The array with the fitted parameters
    pcov : [double]
        The covariance matrix
    confidence : float
        The confidence interval
    
    Returns
    -------
    np.ndarray
        ci: The matrix with the confindence intervals for the parameters
        
    Note:
        Adapted from 
        http://kitchingroup.cheme.cmu.edu/blog/2013/02/12/Nonlinear-curve-fitting-with-parameter-confidence-intervals/
        Copyright (C) 2013 by John Kitchin.
        https://kite.com/python/examples/702/scipy-compute-a-confidence-interval-from-a-dataset
    
    """
    is_log = kwargs.get('is_log', False)
    from scipy.stats.distributions import t

    if is_log:
        p = np.power(10, pars)
        pcov = np.power(10, pcov)

    p = len(pars)  # number of data points
    dof = max(0, n - p)  # number of degrees of freedom

    # Quantile of Student's t distribution for p=(1 - alpha/2)
    # tval = t.ppf((1.0 + confidence)/2.0, dof)
    alpha = 1.0 - confidence
    tval = t.ppf(1.0 - alpha / 2.0, dof)

    ci = np.zeros((p, 2), dtype=np.float64)

    for i, p, var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        ci[i, :] = [p - sigma * tval, p + sigma * tval]

    return ci
Ejemplo n.º 24
0
def compute_error(y,pars,pcov,alpha=0.05):
    # 95% confidence interval = 100*(1-alpha)

    n = len(y)    # number of data points
    p = len(pars) # number of parameters
    dof = max(0, n - p) # number of degrees of freedom
    # student-t value for the dof and confidence level
    tval = t.ppf(1.0-alpha/2., dof) 

    for i, p,var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        print(p - sigma*tval,p + sigma*tval)
Ejemplo n.º 25
0
def regress(A, y, alpha=None):
    '''Linear regression with confidence intervals.

    A is a matrix of function values in columns, e.g.
    A = np.column_stack([T**0, T**1, T**2, T**3, T**4])

    y is a vector of values you want to fit

    alpha is for the 100*(1 - alpha) confidence level

    returns: [b, bint, se]
      b is a vector of the fitted parameters
      bint is a 2D array of confidence intervals
      se is an array of standard error for each parameter.

    The confidence intervals account for sample size using a student T multiplier.
    
    This code is derived from the descriptions at http://www.weibull.com/DOEWeb/confidence_intervals_in_multiple_linear_regression.htm and http://www.weibull.com/DOEWeb/estimating_regression_models_using_least_squares.htm
    '''
    
    b, res, rank, s = np.linalg.lstsq(A, y)

    bint, se = None, None

    if alpha is not None:
        # compute the confidence intervals
        n = len(y)
        k = len(b)

        errors =  y - np.dot(A, b)
        sigma2 = np.sum(errors**2) / (n - k)  # RMSE

        covariance =  np.linalg.inv(np.dot(A.T, A))
                
        C = sigma2 * covariance 
        dC = np.diag(C)
        
        if (dC < 0.0).any():
            warnings.warn('\n{0}\ndetected a negative number in your'
                          'covariance matrix. Taking the absolute value'
                          'of the diagonal. something is probably wrong'
                          'with your data or model'.format(dC))
            dC = np.abs(dC)
            
        se = np.sqrt(dC) # standard error

        sT = t.ppf(1.0 - alpha/2.0, n - k - 1) # student T multiplier
        CI = sT * se

        bint = np.array([(beta - ci, beta + ci) for beta,ci in zip(b,CI)])

    return (b, bint, se)
Ejemplo n.º 26
0
  def NewState(self):
    global START_TIME
    power = self.autoscaler.PowerRequestsPerSecond() * 60
    power_per_node = (power / len(self.autoscaler.nodes))
    # how many requests I got in the last minute
    last_requests = sum(self.autoscaler.num_requests)
    if self.use_predictions:
      x = range(1,13)
      slope, intercept, r_value, p_value, std_err = stats.linregress(x, self.autoscaler.num_requests)
      
      x = np.arange(1,13)
      y = np.array(self.autoscaler.num_requests)
      y_err = y - (intercept + slope * x)
      p_x = np.arange(13, 25)
      mean_x = np.mean(x) 
      alpha = 0.05
      n = len(x)
      t_ = t.ppf(1 - alpha / 2, n - 2)
      s_err = np.sum(np.power(y_err,2))
      confs = t_ * np.sqrt((s_err/(n-2))*(1.0/n + (np.power((p_x-mean_x),2)/
                    ((np.sum(np.power(x,2)))-n*(np.power(mean_x,2))))))
      p_y = slope*p_x+intercept
      lower = p_y - abs(confs)
      upper = p_y + abs(confs)

      prediction = sum([max(0, intercept + slope * x) for x in range(13,25)])
      print 'Last requests:', last_requests
      self.out.write('%s Last requests: %s prediction: %s upper: %s\n' % (time.time() - START_TIME, last_requests, prediction, sum(upper)))
      print 'Prediction: ', prediction, 'Upper:', sum(upper), 'Lower: ', sum(lower)
      # I'm being conservative here, and not decreasing the number of nodes
      # before the load goes down. I'm also using the upper bound of 95%
      # confidence interval.
      last_requests = max(sum(upper), last_requests)
      # last_requests = max(prediction, last_requests)



    print 'Current power:', power, 'requests to handle:', prediction
    min_cost = sys.maxint
    n_nodes = 1
    nodes = [x for x in self.autoscaler.possible_nodes if not x.IsDown()]
    for i in range(1, len(nodes) + 1):
      cost = self.ExpectedCost(i, power_per_node, last_requests)
      print i, 'nodes, P(SLA): ', self.ProbSLAViolation(i, power_per_node, last_requests), 
      print 'cost:', cost
      if cost < min_cost:
        min_cost, n_nodes = cost, i
    nodes = nodes[:n_nodes]
    state = {x.name:str(x.AvgPredictionTime()) for x in nodes}
    self.autoscaler.nodes = nodes
    sys.stdout.flush()
    return state
Ejemplo n.º 27
0
def predband(x: np.ndarray,
             xd: np.ndarray,
             yd: np.ndarray,
             p: np.ndarray,
             func: Callable[[np.ndarray, np.ndarray], np.ndarray],
             conf: float = 0.95):
    """
    This function estimates the prediction bands for the specified function without using the jacobian of the fit
    https://codereview.stackexchange.com/questions/84414/obtaining-prediction-bands-for-regression-model

    Parameters
    ----------
    x: np.ndarray
        The requested data points for the prediction bands
    xd: np.ndarray
        The experimental values for x
    yd: np.ndarray
        The experimental values for y
    p: np.ndarray
        The fitted parameters
    func: Callable[[np.ndarray, np.ndarray], np.ndarray]
        The optimized function
    conf: float
        The confidence level

    Returns
    -------
    np.ndarray:
        yp: The value of the function at the requested points (x)
    np.ndarray:
        lpb: The lower prediction band
    np.ndarray:
        upb: The upper prediction band
    """
    alpha = 1.0 - conf  # significance
    npoints = len(xd)  # data sample size
    var_n = len(p)  # number of parameters
    # Quantile of Student's t distribution for p=(1-alpha/2)
    from scipy.stats.distributions import t
    q = t.ppf(1.0 - alpha / 2.0, npoints - var_n)
    # Stdev of an individual measurement
    se = np.sqrt(1. / (npoints - var_n) * np.sum((yd - func(xd, *p))**2))
    # Auxiliary definitions
    sx = (x - xd.mean())**2
    sxd = np.sum((xd - xd.mean())**2)
    # Predicted values (best-fit model)
    yp = func(x, *p)
    # Prediction band
    dy = q * se * np.sqrt(1.0 + (1.0 / npoints) + (sx / sxd))
    # Upper & lower prediction bands.
    lpb, upb = yp - dy, yp + dy
    return yp, lpb, upb
Ejemplo n.º 28
0
def nlinfit(model, x, y, p0, alpha=0.05):
    """Nonlinear regression with confidence intervals.

    Parameters
    ----------
    model : function f(x, p0, p1, ...) = y
    x : array of the independent data
    y : array of the dependent data
    p0 : array of the initial guess of the parameters
    alpha : 100*(1 - alpha) is the confidence interval
        i.e. alpha = 0.05 is 95% confidence

    Example
    -------
    Fit a line \(y = mx + b\) to some data.

    >>> import numpy as np
    >>> def f(x, m, b):
    ...    return m * x + b
    ...
    >>> X = np.array([0, 1, 2])
    >>> y = np.array([0, 2, 4])
    >>> nlinfit(f, X, y, [0, 1])
    (array([  2.00000000e+00,  -2.18062024e-12]), array([[  2.00000000e+00,   2.00000000e+00],
           [ -2.18315458e-12,  -2.17808591e-12]]), array([  1.21903752e-12,   1.99456367e-16]))

    Returns
    -------
    [p, pint, SE]
      p is an array of the fitted parameters
      pint is an array of confidence intervals
      SE is an array of standard errors for the parameters.

    """
    pars, pcov = curve_fit(model, x, y, p0=p0)
    n = len(y)  # number of data points
    p = len(pars)  # number of parameters

    dof = max(0, n - p)  # number of degrees of freedom

    # student-t value for the dof and confidence level
    tval = t.ppf(1.0 - alpha / 2., dof)

    SE = []
    pint = []
    for i, p, var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        SE.append(sigma)
        pint.append([p - sigma * tval, p + sigma * tval])

    return (pars, np.array(pint), np.array(SE))
Ejemplo n.º 29
0
def nlinfit(model, x, y, p0, alpha=0.05):
    """Nonlinear regression with confidence intervals.

    Parameters
    ----------
    model : function f(x, p0, p1, ...) = y
    x : array of the independent data
    y : array of the dependent data
    p0 : array of the initial guess of the parameters
    alpha : 100*(1 - alpha) is the confidence interval
        i.e. alpha = 0.05 is 95% confidence

    Example
    -------
    Fit a line \(y = mx + b\) to some data.

    >>> import numpy as np
    >>> def f(x, m, b):
    ...    return m * x + b
    ...
    >>> X = np.array([0, 1, 2])
    >>> y = np.array([0, 2, 4])
    >>> nlinfit(f, X, y, [0, 1])
    (array([  2.00000000e+00,  -2.18062024e-12]), array([[  2.00000000e+00,   2.00000000e+00],
           [ -2.18315458e-12,  -2.17808591e-12]]), array([  1.21903752e-12,   1.99456367e-16]))

    Returns
    -------
    [p, pint, SE]
      p is an array of the fitted parameters
      pint is an array of confidence intervals
      SE is an array of standard errors for the parameters.

    """
    pars, pcov = curve_fit(model, x, y, p0=p0)
    n = len(y)    # number of data points
    p = len(pars)  # number of parameters

    dof = max(0, n - p)  # number of degrees of freedom

    # student-t value for the dof and confidence level
    tval = t.ppf(1.0-alpha/2., dof)

    SE = []
    pint = []
    for i, p, var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        SE.append(sigma)
        pint.append([p - sigma * tval, p + sigma * tval])

    return (pars, np.array(pint), np.array(SE))
Ejemplo n.º 30
0
def fit_curve(f, X, Y, alpha=.05):
    x, y = list(map(np.asarray, [X, Y]))
    try:
        # print x, y
        pars, pcov = curve_fit(f, x, y)
    except:
        pars, pcov = curve_fit(f, x, y, method='dogbox')

    n = len(y)
    p = len(pars)
    dof = max(0, n - p)
    tval = t.ppf(1.0 - alpha / 2., dof)
    conf = [tval * elem**.5 for elem in np.diag(pcov)]
    return pars, conf
Ejemplo n.º 31
0
def trimmed_mean_ci(data, limits=(0.2,0.2), inclusive=(True,True),
                    alpha=0.05, axis=None):
    """
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    """
    data = ma.array(data, copy=False)
    trimmed = mstats.trimr(data, limits=limits, inclusive=inclusive, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = mstats.trimmed_stde(data,limits=limits,inclusive=inclusive,axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1-alpha/2.,df)
    return np.array((tmean - tppf*tstde, tmean+tppf*tstde))
Ejemplo n.º 32
0
def fit_model(x, y): 
    initial_guess = [1.2, 0.03]
    pars, pcov = curve_fit(func, x, y, p0=initial_guess)
    alpha = 0.05 # 95% confidence interval = 100*(1-alpha)

    n = len(y)    # number of data points
    p = len(pars) # number of parameters
    dof = max(0, n - p) # number of degrees of freedom
    # student-t value for the dof and confidence level
    tval = t.ppf(1.0-alpha/2., dof) 
    for i, p,var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        print 'p{0}: {1} [{2}  {3}]'.format(i, p,p - sigma*tval,p + sigma*tval)

    return pars
Ejemplo n.º 33
0
def pcov_t_lsq(xdata_to_fit, ydata_to_fit, p_lsq, cov_x, alpha):
    '''
    # calculate covariance of parameters    
    '''
    # calc residual variance/ reduced chi square
    res_var=np.sum((Lorentzian_cvft(xdata_to_fit,*p_lsq)-ydata_to_fit)**2)/(len(ydata_to_fit)-len(p_lsq))
    pcov_lsq=cov_x*res_var    
    delta_p_lsq=np.sqrt(np.diag(pcov_lsq))
    # degrees of freedom, should be positive or zero
    dof=len(ydata_to_fit)-len(p_lsq) 
    # get t value from t distribution
    from scipy.stats.distributions import t
    tval=t.ppf(1.0-alpha/2.0,dof)
    # calc t distribution corrected sigma
    delta_p_lsq=delta_p_lsq*tval*2
    return delta_p_lsq
Ejemplo n.º 34
0
def confidenceInterval(mean, stdev, nSamples, percentConfidence, trueStd = True, printLatex = False):
    '''if trueStd, use normal distribution, otherwise, Student

    Use otherwise t.interval or norm.interval
    ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11))
    t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples))
    loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)'''
    from math import sqrt
    from scipy.stats.distributions import norm, t
    if trueStd:
        k = round(norm.ppf(0.5+percentConfidence/200., 0, 1)*100)/100. # 1.-(100-percentConfidence)/200.
    else: # use Student
         k = round(t.ppf(0.5+percentConfidence/200., nSamples-1)*100)/100.
    e = k*stdev/sqrt(nSamples)
    if printLatex:
        print('${0} \pm {1}\\frac{{{2}}}{{\sqrt{{{3}}}}}$'.format(mean, k, stdev, nSamples))
    return mean-e, mean+e
Ejemplo n.º 35
0
def curve_fit_ci(popt,pcov,n):
    
    alpha = 0.05
    p = len(popt)
    dof = max(0,n-p)
    tval = t.ppf(1.0-alpha/2., dof)

    lower = []
    upper = []
    for i,coef,var in zip(range(p),popt, np.diag(pcov)):
        sigma = var**0.5
        print('p{0}: {1} [{2}  {3}]'.format(i, coef,
                                  coef - sigma/np.sqrt(n)*tval,
                                  coef + sigma/np.sqrt(n)*tval))
        lower.append(coef - sigma/np.sqrt(n)*tval)
        upper.append(coef + sigma/np.sqrt(n)*tval)

    return lower,upper
Ejemplo n.º 36
0
def get_accuracies(pars, pcov, ydata):
    alpha = 0.05  # 95% confidence interval = 100*(1-alpha)

    n = len(ydata)  # number of data points
    p = len(pars)  # number of parameters

    dof = max(0, n - p)  # number of degrees of freedom

    # student-t value for the dof and confidence level
    tval = t_stat.ppf(1.0 - alpha / 2., dof)

    sigmas = []

    for i, p, var in zip(range(n), pars, np.diag(pcov)):
        sigma = np.sqrt(var)
        sigmas.append(sigma * tval)

    return sigmas, dof
def diffuse_fit(x, y, ti, t0):
    dt = ti - t0
    popt, pcov = curve_fit(pde_fun, x, y, p0=[0.2, -1, -1])
    C1, C2, C3 = popt
    n = len(x)  # number of data points
    p = len(pcov)  # number of parameters
    dof = max(0, n - p)  # number of degrees of freedom
    alpha = 0.05
    # student-t value for the dof and confidence level
    tval = t.ppf(1.0 - 0.5 * alpha, dof)
    sigma = np.diag(pcov)**0.5
    moe = tval * sigma
    #uncertianity analysis
    D_2 = -1 / (dt * 4 * C2)
    D_2p = 1 / (dt * 4 * (moe[1] + D_2))
    D_2n = 1 / (dt * 4 * (moe[1] - D_2))
    D_2u = 0.5 * (abs(D_2 - D_2p) + abs(D_2 - D_2n))
    return D_2, D_2u, C1, C2, C3
Ejemplo n.º 38
0
def dict_recur_mean_err(dlist):
    """
    Accepts list of nested dictionaries and produces a single
    dictionary containing mean values and estimated errors
    from these dictionaries. Errors are estimated as confidence
    intervals lengths.
    """
    if isinstance(dlist[0], dict):
        res_dict = {}
        for k in dlist[0]:
            n_dlist = [d[k] for d in dlist]
            res_dict[k] = dict_recur_mean_err(n_dlist)
        return res_dict
    else:
        n = len(dlist)
        mean = float(sum(dlist)) / n
        variance = float(sum(map(lambda xi: (xi-mean)**2, dlist))) / n
        std = math.sqrt(variance)
        err = t.ppf(1-alpha/2.,n-1) * std / math.sqrt(n-1)
        return (mean, err)
Ejemplo n.º 39
0
def confidenceFit(func,x,y, initial_guess, alpha=0.1,maxfev=800):

	# alpha is the degree of confidence interval = 100*(1-alpha), for example for 98% take alpha=0.02
	pars, pcov = curve_fit(func, x, y, p0=initial_guess,maxfev=maxfev)



	n = len(y)    # number of data points
	p = len(pars) # number of parameters

	dof = max(0, n - p) # number of degrees of freedom
	#print pcov
	# student-t value for the dof and confidence level
	tval = t.ppf(1.0-alpha/2., dof) 
	#std from covariance	
	sigma = np.diag(pcov)**0.5
	lower_bound = pars - sigma*tval
	upper_bound = pars + sigma*tval

	return pars,lower_bound,upper_bound,sigma,tval
Ejemplo n.º 40
0
def simple_confint(yy, popt, pcov):
    alpha = 0.05
    n = len(yy)
    p = len(popt)

    dof = max(0, n - p)

    tval = t.ppf(1.0 - alpha / 2., dof)
    cf = np.zeros((2, 2))

    for i, p, var in zip(range(n), popt, np.diag(pcov)):
        sigma = var**0.5
        cf[i, 0] = p - sigma * tval
        cf[i, 1] = p + sigma * tval
        '''print('p{0}: {1} [{2}  {3}]' .format(i, p,
                                                p - sigma*tval,
                                                p + sigma*tval))
                                                '''

    return cf
Ejemplo n.º 41
0
def confidenceInterval(mean,
                       stdev,
                       nSamples,
                       percentConfidence,
                       trueStd=True,
                       printLatex=False):
    '''if trueStd, use normal distribution, otherwise, Student

    Use otherwise t.interval or norm.interval
    ex: norm.interval(0.95, loc = 0., scale = 2.3/sqrt(11))
    t.interval(0.95, 10, loc=1.2, scale = 2.3/sqrt(nSamples))
    loc is mean, scale is sigma/sqrt(n) (for Student, 10 is df)'''
    from scipy.stats.distributions import norm, t
    if trueStd:
        k = round(norm.ppf(0.5 + percentConfidence / 200., 0, 1), 2)
    else:  # use Student
        k = round(t.ppf(0.5 + percentConfidence / 200., nSamples - 1), 2)
    e = k * stdev / sqrt(nSamples)
    if printLatex:
        print('${0} \pm {1}\\frac{{{2}}}{{\sqrt{{{3}}}}}$'.format(
            mean, k, stdev, nSamples))
    return mean - e, mean + e
Ejemplo n.º 42
0
def trimmed_mean_ci(data, proportiontocut=0.2, alpha=0.05, axis=None):
    """Returns the selected confidence interval of the trimmed mean along the
    given axis.
    
:Inputs:
    data : sequence
        Input data. The data is transformed to a masked array
    proportiontocut : float *[0.2]*
        Proportion of the data to cut from each side of the data . 
        As a result, (2*proportiontocut*n) values are actually trimmed.
    alpha : float *[0.05]*
        Confidence level of the intervals
    axis : integer *[None]*
        Axis along which to cut.
    """
    data = masked_array(data, copy=False)
    trimmed = trim_both(data, proportiontocut=proportiontocut, axis=axis)
    tmean = trimmed.mean(axis)
    tstde = trimmed_stde(data, proportiontocut=proportiontocut, axis=axis)
    df = trimmed.count(axis) - 1
    tppf = t.ppf(1-alpha/2.,df)
    return numpy.array((tmean - tppf*tstde, tmean+tppf*tstde))
                                      maxfev=4000,
                                      ftol=1e-6)

print('Fitted parameters=', fitParams)

##--------------t-test
alpha = 0.05  # 95% confidence interval = 100*(1-alpha)

n = len(y)  # number of data points
p = len(fitParams)  # number of parameters

dof = max(0, n - p)  # number of degrees of freedom

## student-t value for the dof and confidence level
##https://stackoverflow.com/questions/21494141/how-do-i-do-a-f-test-in-python
tval = t.ppf(1.0 - alpha / 2., dof)
#
for i, p, var in zip(range(n), fitParams, np.diag(fitCovariances)):
    sigma = var**0.5
    print('p{0}: {1} [{2}  {3}]'.format(i, p, p - sigma * tval,
                                        p + sigma * tval))

#Mutual information
# http://scikit-learn.org/stable/auto_examples/feature_selection/plot_f_test_vs_mi.html

#F-statistics tutorial
#http://www.statisticshowto.com/f-statistic/

#predicting with fitted function
A = x.T
ypred = fitFunc(A, fitParams[0], fitParams[1], fitParams[2], fitParams[3],
Ejemplo n.º 44
0
#!/usr/bin/env python
import numpy as np
from scipy.stats.distributions import  t
n = 10  # number of measurements
dof = n - 1  # degrees of freedom
avg_x = 16.1  # average measurement
std_x = 0.01  # standard deviation of measurements
# Find 95% prediction interval for next measurement
alpha = 1.0 - 0.95
pred_interval = t.ppf(1 - alpha / 2., dof) * std_x * np.sqrt(1. + 1. / n)
s = ['We are 95% confident the next measurement',
       ' will be between {0:1.3f} and {1:1.3f}']
print(''.join(s).format(avg_x - pred_interval, avg_x + pred_interval))
    x2_big_sample += var(bags_for_big_samples[:number_of_sample], ddof=1)

small_sample_average = x_small_sample / number_of_simulation
medium_sample_average = x_medium_sample / number_of_simulation
big_sample_average = x_big_sample / number_of_simulation

small_sample_variance = x2_small_sample / number_of_simulation / number_of_sample
medium_sample_variance = x2_medium_sample / number_of_simulation / number_of_sample
big_sample_variance = x2_big_sample / number_of_simulation / number_of_sample


print("Small Sample : $\mu$ = " + str(average(small_sample_average)) + ", $\sigma^2$ " + str(average(small_sample_variance)))
print("medium Sample : $\mu$ = " + str(average(medium_sample_average)) + ", $\sigma^2$ " + str(average(medium_sample_variance)))
print("big Sample : $\mu$ = " + str(average(big_sample_average)) + ", $\sigma^2$ " + str(average(big_sample_variance)))

quantile = t.ppf(0.975, number_of_sample-1, loc=0, scale=1)

# print("IC for Small Sample :" + str(quantile*sqrt(small_sample_variance*(1-3/2400))/small_sample_average*100) + "%")
# print("IC for medium Sample :" + str(quantile*sqrt(medium_sample_variance*(1-24/2400))/medium_sample_average*100) + "%")
# print("IC for big Sample :" + str(quantile*sqrt(big_sample_variance*(1-60/2400))/big_sample_average*100) + "%")


total_tank_average_using_small_samples = number_of_small_samples * small_sample_average
total_tank_average_using_medium_samples = number_of_medium_samples * medium_sample_average
total_tank_average_using_big_samples = number_of_big_samples * big_sample_average
total_tank_variance_using_small_samples = number_of_small_samples**2 * small_sample_variance
total_tank_variance_using_medium_samples = number_of_medium_samples**2 * medium_sample_variance
total_tank_variance_using_big_samples = number_of_big_samples**2 * big_sample_variance

print("Total Tank estimation using Small Sample : $\mu$ = " + str(total_tank_average_using_small_samples) + ", $\sigma^2$ " + str(total_tank_variance_using_small_samples))
print("Total Tank estimation using medium Sample : $\mu$ = " + str(total_tank_average_using_medium_samples) + ", $\sigma^2$ " + str(total_tank_variance_using_medium_samples))
Ejemplo n.º 46
0
b =
    4.9671
    2.1100
bint =
    4.6267    5.3075
    1.7671    2.4528
'''
x = np.array([ 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ])
y = np.array([ 4.70192769,  4.46826356,  4.57021389,  4.29240134,  3.88155125,
            3.78382253,  3.65454727,  3.86379487,  4.16428541,  4.06079909])
# this is the function we want to fit to our data
def func(x,c0, c1):
    return c0 * np.exp(-x) + c1*x
pars, pcov = curve_fit(func, x, y, p0=[4.96, 2.11])
alpha = 0.05 # 95% confidence interval
n = len(y)    # number of data points
p = len(pars) # number of parameters
dof = max(0, n-p) # number of degrees of freedom
tval = t.ppf(1.0-alpha/2., dof) # student-t value for the dof and confidence level
for i, p,var in zip(range(n), pars, np.diag(pcov)):
    sigma = var**0.5
    print('c{0}: {1} [{2}  {3}]'.format(i, p,
                                  p - sigma*tval,
                                  p + sigma*tval))
import matplotlib.pyplot as plt
plt.plot(x,y,'bo ')
xfit = np.linspace(0,1)
yfit = func(xfit, pars[0], pars[1])
plt.plot(xfit,yfit,'b-')
plt.legend(['data','fit'],loc='best')
plt.savefig('images/nonlin-fit-ci.png')
print("Small Sample : $\mu$ = " + str(average(small_sample_average)) + ", $\sigma^2$ " + str(average(small_sample_variance)))
print("medium Sample : $\mu$ = " + str(average(medium_sample_average)) + ", $\sigma^2$ " + str(average(medium_sample_variance)))
print("big Sample : $\mu$ = " + str(average(big_sample_average)) + ", $\sigma^2$ " + str(average(big_sample_variance)))

number_of_small_samples = 2400
number_of_medium_samples = 2400/8
number_of_big_samples = 2400/20


total_tank_average_using_small_samples = number_of_small_samples * average(small_sample_average)
total_tank_average_using_medium_samples = number_of_medium_samples * average(medium_sample_average)
total_tank_average_using_big_samples = number_of_big_samples * average(big_sample_average)

total_tank_variance_using_small_samples = number_of_small_samples * average(small_sample_variance)
total_tank_variance_using_medium_samples = number_of_medium_samples * average(medium_sample_variance)
total_tank_variance_using_big_samples = number_of_big_samples * average(big_sample_variance)

print("Total Tank estimation using Small Sample : $\mu$ = " + str(total_tank_average_using_small_samples) + ", $\sigma^2$ " + str(total_tank_variance_using_small_samples))
print("Total Tank estimation using medium Sample : $\mu$ = " + str(total_tank_average_using_medium_samples) + ", $\sigma^2$ " + str(total_tank_variance_using_medium_samples))
print("Total Tank estimation using big Sample : $\mu$ = " + str(total_tank_average_using_big_samples) + ", $\sigma^2$ " + str(total_tank_variance_using_big_samples))

quantile = t.ppf(0.975, 2, loc=0, scale=1)

correction_factor_for_small_samples = 1

print("Total Tank estimation IC using Small Sample : " + str(quantile*sqrt(total_tank_variance_using_small_samples)/total_tank_average_using_small_samples*100) + "%")
print("Total Tank estimation IC using medium Sample : " + str(quantile*sqrt(total_tank_variance_using_medium_samples)/total_tank_average_using_medium_samples*100) + "%")
print("Total Tank estimation IC using big Sample : " + str(quantile*sqrt(total_tank_variance_using_big_samples)/total_tank_average_using_big_samples*100) + "%")

Ejemplo n.º 48
0
def _calc_ajusted_recomb(dists, recombs, max_recomb, max_zero_dist_recomb,
                         alpha_recomb_0, plot_fhand=None):
    # first rough interpolation
    # we remove the physical distances with high recombination rates because
    # they're not very informative. e.g. more than 40 cM will not discriminate
    # between false recombination due to hidden segregation in the parents and
    # true recombination

    if plot_fhand:
        fig = Figure()
        axes = fig.add_subplot(111)
        axes.set_axis_bgcolor((1, 0.6, 0.6))
        axes.scatter(dists, recombs, c='r', label='For 1st fit')
    else:
        axes = None
        fig = None

    dists = numpy.array(dists)
    recombs = numpy.array(recombs)
    recomb_rate = 1e-7
    popt, pcov = _fit_kosambi(dists, recombs, init_params=[recomb_rate, 0])
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason': '1st fit failed'}

    est_dists = dists
    est_recombs = _kosambi(est_dists, popt[0], popt[1])

    if fig:
        axes.plot(est_dists, est_recombs, label='1st fit', c='r')

    # now we perform a second fit but only with those markers that are a
    # distance that results in a recombination fraction lower than max_recomb
    close_markers = est_recombs < max_recomb
    close_recombs = recombs[close_markers]
    close_dists = dists[close_markers]

    if plot_fhand:
        axes.scatter(close_dists, close_recombs, c='b', label='For 2nd fit')

    if len(close_dists) < 1:
        # This marker is so bad that their closest markers are at a large
        # distance
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': 'no close region left'}

    if len(close_dists) != len(dists):
        # If we've removed any points we fit again
        popt, pcov = _fit_kosambi(close_dists, close_recombs, init_params=popt)
    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': '2nd fit failed'}

    est_close_recombs = _kosambi(close_dists, popt[0], popt[1])

    residuals = close_recombs - est_close_recombs
    if fig:
        axes.plot(close_dists, est_close_recombs, c='b', label='2nd_fit')

    # we exclude the markers with a residual outlier
    quartile_25, quartile_75 = numpy.percentile(residuals, [25, 75])
    iqr = quartile_75 - quartile_25
    outlayer_thrld = [quartile_25 - iqr * 1.5, quartile_75 + iqr * 1.5]
    ok_markers = [idx for idx, res in enumerate(residuals) if (not isnan(res) and (outlayer_thrld[0] < res < outlayer_thrld[1]))]
    ok_recombs = close_recombs[ok_markers]
    ok_dists = close_dists[ok_markers]

    if fig:
        axes.scatter(ok_dists, ok_recombs, c='g', label='For 3rd fit')

    if len(ok_dists) != len(close_dists):
        # If we've removed any points we fit again
        popt, pcov = _fit_kosambi(ok_dists, ok_recombs, init_params=popt)
    var_recomb_at_dist_0 = pcov[1, 1]
    recomb_at_dist_0 = popt[1]
    ok_color = (0.3, 1, 0.6)
    if isinf(var_recomb_at_dist_0):
        conf_interval = None
        if abs(recomb_at_dist_0) < 0.01:
            # recomb is 0 for all points and the variance is inf
            snp_ok = True
        else:
            snp_ok = False
    else:
        if alpha_recomb_0 is None:
            conf_interval = None
            if abs(recomb_at_dist_0) <= max_zero_dist_recomb:
                snp_ok = True
                ok_color = (0.3, 1, 0.3)
            else:
                snp_ok = False
        else:
            num_data_points = len(ok_dists)
            num_params = len(popt)
            deg_of_freedom = max(0, num_data_points - num_params)
            tval = t.ppf(1.0 - alpha_recomb_0 / 2., deg_of_freedom)
            std_dev = var_recomb_at_dist_0 ** 0.5
            conf_interval = (recomb_at_dist_0 - std_dev * tval,
                             recomb_at_dist_0 + std_dev * tval)

            if abs(recomb_at_dist_0) <= max_zero_dist_recomb:
                snp_ok = True
                ok_color = (0.3, 1, 0.3)
            elif conf_interval[0] < 0 < conf_interval[1]:
                snp_ok = True
            else:
                snp_ok = False
            if plot_fhand:
                axes.vlines(0, conf_interval[0], conf_interval[1],
                            label='conf. interval')

    if plot_fhand:
        color = ok_color if snp_ok else (1, 0.3, 0.3)
        axes.set_axis_bgcolor(color)

    if popt is None:
        _print_figure(axes, fig, plot_fhand)
        return None, False, {'kosambi_fit_ok': False,
                             'reason_no_fit': '3rd fit failed'}

    est2_recombs = _kosambi(ok_dists, popt[0], popt[1])

    if fig:
        axes.plot(ok_dists, est2_recombs, c='g', label='3rd_fit')
        _print_figure(axes, fig, plot_fhand)
    return recomb_at_dist_0, snp_ok, {'kosambi_fit_ok': True,
                                      'conf_interval': conf_interval}
Ejemplo n.º 49
0
def get_eos(self, static=False):
    '''calculate the equation of state for the attached atoms.

    Returns a dictionary of data for each step. You do not need to
    specify any relaxation parameters, only the base parameters for the
    calculations. Writes to eos.org with a report of output.

    if static is True, then run a final static calculation at high
    precision, with ismear=-5.
    '''

    # this returns if the data exists.
    if os.path.exists('eos.json'):
        with open('eos.json') as f:
            return json.loads(f.read())

    # we need an initial calculation to get us going.
    self.calculate()

    cwd = os.getcwd()
    data = {'cwd': os.getcwd()}  # dictionary to store results in

    org = []  # list of strings to make the org-file report
    org += ['#+STARTUP: showeverything']
    org += ['* Initial guess']
    org += [str(self)]
    org += ['',
            '[[shell:jaspsum -p {0}][view initial guess]]'.format(cwd)]

    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    atoms = self.get_atoms()
    original_atoms = atoms.copy()  # save for comparison later.
    v_init = atoms.get_volume()

    # ############################################################
    # ## Step 1
    # ############################################################
    org += ['* step 1 - relax ions and shape']
    volumes1, energies1 = [], []
    ready = True
    factors = [-0.15, -0.07, 0.0, 0.07, 0.15]
    for i, f in enumerate(factors):
        wd = cwd + '/step-1/f-{0}'.format(i)
        self.clone(wd)

        with jasp(wd,
                  isif=4,
                  ibrion=2,
                  ediffg=-0.05, ediff=1e-6,
                  nsw=50,
                  atoms=atoms) as calc:
            try:
                # add org-link to calculation
                org += ['[[shell:jaspsum {0}][{0}]]'.format(wd)]

                atoms.set_volume(v_init * (1 + f))
                volumes1.append(atoms.get_volume())
                energies1.append(atoms.get_potential_energy())
                calc.strip()

            except (VaspSubmitted, VaspQueued):
                ready = False

    if not ready:
        log.info('Step 1 is still running')
        raise VaspRunning

    data['step1'] = {}
    data['step1']['volumes'] = volumes1
    data['step1']['energies'] = energies1
    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    # create an org-table of the data.
    org += ['',
            '#+tblname: step1',
            '| volume (A^3) | Energy (eV) |',
            '|-']
    for v, e in zip(volumes1, energies1):
        org += ['|{0}|{1}|'.format(v, e)]
    org += ['']

    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    eos1 = EquationOfState(volumes1, energies1)

    try:
        v1, e1, B1 = eos1.fit()
    except:
        with open('error', 'w') as f:
            f.write('Error fitting the equation of state')

    data['step1']['eos'] = (v1, e1, B1)
    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    # create a plot
    f = eos1.plot(show=False)
    f.subplots_adjust(left=0.18, right=0.9, top=0.9, bottom=0.15)
    plt.xlabel(u'volume ($\AA^3$)')
    plt.ylabel(u'energy (eV)')
    plt.title(u'E: %.3f eV, V: %.3f $\AA^3$, B: %.3f GPa' %
              (e1, v1, B1 / GPa))

    plt.text(eos1.v0, max(eos1.e), 'EOS: %s' % eos1.eos_string)
    f.savefig('eos-step1.png')

    org += ['[[./eos-step1.png]]',
            '']

    min_energy_index = np.argmin(energies1)

    if min_energy_index in [0, -1]:
        log.warn('Your minimum energy is at an endpoint.'
                 'This indicates something is wrong.')

    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))
    # ########################################################
    # #  STEP 2
    # ########################################################
    # step 2 - isif=4, ibrion=1. now we allow the shape of each cell to
    # change, and we use the best guess from step 1 for minimum volume.
    ready = True
    volumes2, energies2 = [], []
    factors = [-0.09, -0.06, -0.03, 0.0, 0.03, 0.06, 0.09]

    org += ['* step 2 - relax ions and shape with improved minimum estimate']

    for i, f in enumerate(factors):
        wd = cwd + '/step-2/f-{0}'.format(i)

        # clone closest result from above.
        with jasp('step-1/f-{0}'.format(min_energy_index)) as calc:
            calc.clone(wd)

        with jasp(wd,
                  isif=4,
                  ibrion=1,
                  nsw=50) as calc:
            try:
                atoms = calc.get_atoms()
                atoms.set_volume(v1 * (1 + f))

                volumes2 += [atoms.get_volume()]
                energies2 += [atoms.get_potential_energy()]
                calc.strip()
            except (VaspSubmitted, VaspQueued):
                ready = False

    if not ready:
        log.info('Step 2 is still running')
        raise VaspRunning

    # update org and json files.
    data['step2'] = {}
    data['step2']['volumes'] = volumes2
    data['step2']['energies'] = energies2
    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    # create an org-table of the data.
    org += ['',
            '#+tblname: step2',
            '| volume (A^3) | Energy (eV) |',
            '|-']
    for v, e in zip(volumes2, energies2):
        org += ['|{0}|{1}|'.format(v, e)]
    org += ['']

    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    eos2 = EquationOfState(volumes2, energies2)
    try:
        v2, e2, B2 = eos2.fit()
    except:
        with open('error', 'w') as f:
            f.write('Error fitting the equation of state')

    data['step2']['eos'] = (v2, e2, B2)
    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    f = eos2.plot(show=False)
    f.subplots_adjust(left=0.18, right=0.9, top=0.9, bottom=0.15)
    plt.xlabel(u'volume ($\AA^3$)')
    plt.ylabel(u'energy (eV)')
    plt.title(u'E: %.3f eV, V: %.3f $\AA^3$, B: %.3f GPa' %
              (e2, v2, B2 / GPa))

    plt.text(eos2.v0, max(eos2.e), 'EOS: %s' % eos2.eos_string)
    f.savefig('eos-step2.png')

    org += [
        '[[./eos-step2.png]]',
        '']
    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    # statistical analysis of the equation of state
    EOS = ['sjeos',
           'taylor',
           'murnaghan',
           'birch',
           'birchmurnaghan',
           'pouriertarantola',
           'vinet']

    from ase.units import kJ
    Vs, Es, Bs = [], [], []
    for label in EOS:
        eos = EquationOfState(volumes2, energies2, eos=label)
        try:
            v, e, B = eos.fit()
            Vs += [v]
            Es += [e]
            Bs += [B / kJ * 1.0e24]  # GPa
        except:
            with open('error', 'w') as f:
                f.write('Error fitting the '
                        'equation of state {0}'.format(label))

    avgV = np.mean(Vs)
    stdV = np.std(Vs)

    avgE = np.mean(Es)
    stdE = np.std(Es)

    avgB = np.mean(Bs)
    stdB = np.std(Bs)

    from scipy.stats.distributions import t
    n = len(Vs)
    dof = n - 1
    alpha = 0.05

    Vconf = t.ppf(1 - alpha/2., dof) * stdV * np.sqrt(1 + 1./n)
    Bconf = t.ppf(1 - alpha/2., dof) * stdB * np.sqrt(1 + 1./n)

    data['step2']['avgV'] = avgV
    data['step2']['Vconf95'] = Vconf
    data['step2']['avgB'] = avgB
    data['step2']['Bconf95'] = Bconf

    org += ['** Statistical analysis',
            '''
Volume = {avgV:1.3f} \pm {Vconf:1.3f} \AA^3 at the 95% confidence level
B = {avgB:1.0f} \pm {Bconf:1.0f} GPa at the 95% confidence level
'''.format(**locals())]

    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    # step 3 should be isif = 3 where we let the volume change too
    # start from the minimum in step2

    org += ['* step 3 - relax volume']
    emin_ind = np.argmin(energies2)
    log.info('Minimum energy found in factor={0}.'.format(factors[emin_ind]))

    with jasp('step-2/f-{0}'.format(emin_ind)) as calc:
        calc.clone('step-3')

    with jasp('step-3',
              isif=3,  # vol, shape and internal degrees of freedom
              ibrion=1,
              prec='high',
              nsw=50) as calc:
        atoms = calc.get_atoms()
        atoms.set_volume(avgV)
        calc.calculate()
        calc.strip()

        org += [str(calc)]

        atoms = calc.get_atoms()
        data['step3'] = {}
        data['step3']['potential_energy'] = atoms.get_potential_energy()
        data['step3']['volume'] = atoms.get_volume()

    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    # now the final step with ismear=-5 for the accurate energy. This
    # is recommended by the VASP manual. We only do this if you
    # specify static=True as an argument
    if static:
        with jasp('step-3') as calc:
            calc.clone('step-4')
        with jasp('step-4',
                  isif=None, ibrion=None, nsw=None,
                  icharg=2,  # do not reuse charge
                  istart=1,
                  prec='high',
                  ismear=-5) as calc:
            calc.calculate()
            atoms = calc.get_atoms()

            data['step4'] = {}
            data['step4']['potential_energy'] = atoms.get_potential_energy()

            org += ['* step-4 - static calculation',
                    str(calc)]

    # final write out
    with open('eos.org', 'w') as f:
        f.write('\n'.join(org))

    # dump data to a json file
    with open('eos.json', 'w') as f:
        f.write(json.dumps(data))

    return data
            print(popt[2])
 
        #np.savetxt(f+'_fit.csv', popt, delimiter = ',')
        #np.savetxt(f+'_fit_cov.csv', popt, delimiter = ',')
        
        # from http://kitchingroup.cheme.cmu.edu/blog/2013/02/12/Nonlinear-curve-fitting-with-parameter-confidence-intervals/

            alpha = 0.05 # 95% confidence interval = 100*(1-alpha)
        
            n = len(ydata)    # number of data points
            p = len(popt) # number of parameters
            
            dof = max(0, n - p) # number of degrees of freedom
            
            # student-t value for the dof and confidence level
            tval = t.ppf(1.0-alpha/2., dof)
            params = []
            for i, p,var in zip(range(n), popt, np.diag(pcov)):
                sigma = var**0.5
                params.append('p{0}: {1} [{2}  {3}]'.format(i, p,p - sigma*tval,p + sigma*tval))
            print(params)
            with open(f+'_fit_param_confidence_int.txt', 'w') as newfile:
                newfile.write('\n'.join(params))                    
            fit = func(xdata, popt[0], popt[1], popt[2])
            plt.figure(0)            
            plt.plot(xdata, fit, 'g')
            plt.figure(1)            
            plt.plot(xdata, fit, 'g')
            laserpowernum=float(f[f.index(' _')+2:f.index('J')])*0.125167/1e-6#in microJ
            laserpower=format(laserpowernum, '.0f')#in microJ
            powerlist.append(laserpower)
Ejemplo n.º 51
0
    def processFile(self,fullPath):
        fileName, fileExtension = os.path.splitext(fullPath)
        fileName = os.path.basename(fullPath)
        self.fileNames.append(fileName)
        if fileExtension == '.csv':
            delimiter = ','
        else:
            delimiter = None

        self.ui.statusbar.showMessage("processing: "+ fileName,2500)
        
        #wait here for the file to be completely written to disk and closed before trying to read it

        fi = QFileInfo(fullPath)
        while (not fi.isWritable()):
                time.sleep(0.001)
                fi.refresh()
        
        fp = open(fullPath, mode='r')
        fileBuffer = fp.read()
        fp.close()
        first10 = fileBuffer[0:10]
        nMcHeaderLines = 25 #number of header lines in mcgehee IV file format
        isMcFile = False #true if this is a McGehee iv file format
        if (not first10.__contains__('#')) and (first10.__contains__('/')) and (first10.__contains__('\t')):#the first line is not a comment
            #the first 8 chars do not contain comment symbol and do contain / and a tab, it's safe to assume mcgehee iv file format
            isMcFile = True
            #comment out the first 25 rows here
            fileBuffer = '#'+fileBuffer
            fileBuffer = fileBuffer.replace('\n', '\n#',nMcHeaderLines-1)

        splitBuffer = fileBuffer.splitlines(True)
        
        
        area = 1
        noArea = True
        vsTime = False #this is not an i,v vs t data file
        #extract header lines and search for area
        header = []
        for line in splitBuffer:
            if line.startswith('#'):
                header.append(line)
                if line.__contains__('Area'):
                    area = float(line.split(' ')[3])
                    noArea = False
                if line.__contains__('I&V vs t'):
                    if float(line.split(' ')[5]) == 1:
                        vsTime = True
            else:
                break
        
        outputScaleFactor = np.array(1000/area) #for converstion to [mA/cm^2]

        tempFile = QTemporaryFile()
        tempFile.open()
        tempFile.writeData(fileBuffer)
        tempFile.flush()

        #read in data
        try:
            data = np.loadtxt(str(tempFile.fileName()),delimiter=delimiter)
            VV = data[:,0]
            II = data[:,1]
            if vsTime:
                time = data[:,2]
        except:
            self.ui.statusbar.showMessage('Could not read' + fileName +'. Prepend # to all non-data lines and try again',2500)
            return
        tempFile.close()
        tempFile.remove()

        
        if isMcFile: #convert to amps
            II = II/1000*area

        if not vsTime:
            #sort data by ascending voltage
            newOrder = VV.argsort()
            VV=VV[newOrder]
            II=II[newOrder]
            #remove duplicate voltage entries
            VV, indices = np.unique(VV, return_index =True)
            II = II[indices]
        else:
            #sort data by ascending time
            newOrder = time.argsort()
            VV=VV[newOrder]
            II=II[newOrder]
            time=time[newOrder]
            time=time-time[0]#start time at t=0

        #catch and fix flipped current sign:
        if II[0] < II[-1]:
            II = II * -1       

        indexInQuad1 = np.logical_and(VV>0,II>0)
        if any(indexInQuad1): #enters statement if there is at least one datapoint in quadrant 1
            isDarkCurve = False
        else:
            self.ui.statusbar.showMessage("Dark curve detected",500)
            isDarkCurve = True
        
        #put items in table
        self.ui.tableWidget.insertRow(self.rows)
        for ii in range(len(self.cols)):
            self.ui.tableWidget.setItem(self.rows,ii,QTableWidgetItem())        
        
        if not vsTime:
            fitParams, fitCovariance, infodict, errmsg, ier = self.bestEffortFit(VV,II)
        
            #print errmsg
    
            I0_fit = fitParams[0]
            Iph_fit = fitParams[1]
            Rs_fit = fitParams[2]
            Rsh_fit = fitParams[3]
            n_fit = fitParams[4]
    
            
            #0 -> LS-straight line
            #1 -> cubic spline interpolant
            smoothingParameter = 1-2e-6
            iFitSpline = SmoothSpline(VV, II, p=smoothingParameter)
    
            def cellModel(voltageIn):
                #voltageIn = np.array(voltageIn)
                return vectorizedCurrent(voltageIn, I0_fit, Iph_fit, Rs_fit, Rsh_fit, n_fit)
    
            def invCellPowerSpline(voltageIn):
                if voltageIn < 0:
                    return 0
                else:
                    return -1*voltageIn*iFitSpline(voltageIn)
    
            def invCellPowerModel(voltageIn):
                if voltageIn < 0:
                    return 0
                else:
                    return -1*voltageIn*cellModel(voltageIn)
    
            if not isDarkCurve:
                VVq1 = VV[indexInQuad1]
                IIq1 = II[indexInQuad1]
                vMaxGuess = VVq1[np.array(VVq1*IIq1).argmax()]
                powerSearchResults = optimize.minimize(invCellPowerSpline,vMaxGuess)
                #catch a failed max power search:
                if not powerSearchResults.status == 0:
                    print "power search exit code = " + str(powerSearchResults.status)
                    print powerSearchResults.message
                    vMax = nan
                    iMax = nan
                    pMax = nan
                else:
                    vMax = powerSearchResults.x[0]
                    iMax = iFitSpline([vMax])[0]
                    pMax = vMax*iMax                
    
                #only do this stuff if the char eqn fit was good
                if ier < 5:
                    powerSearchResults_charEqn = optimize.minimize(invCellPowerModel,vMaxGuess)
                    #catch a failed max power search:
                    if not powerSearchResults_charEqn.status == 0:
                        print "power search exit code = " + str(powerSearchResults_charEqn.status)
                        print powerSearchResults_charEqn.message
                        vMax_charEqn = nan
                    else:
                        vMax_charEqn = powerSearchResults_charEqn.x[0]
                    #dude
                    try:
                        Voc_nn_charEqn=optimize.brentq(cellModel, VV[0], VV[-1])
                    except:
                        Voc_nn_charEqn = nan
                else:
                    Voc_nn_charEqn = nan
                    vMax_charEqn = nan
    
    
                try:
                    Voc_nn = optimize.brentq(iFitSpline, VV[0], VV[-1])
                except:
                    Voc_nn = nan
    
            else:
                Voc_nn = nan
                vMax = nan
                iMax = nan
                pMax = nan
                Voc_nn_charEqn = nan
                vMax_charEqn = nan
                iMax_charEqn = nan
                pMax_charEqn = nan
    
    
    
            if ier < 5:
                dontFindBounds = False
                iMax_charEqn = cellModel([vMax_charEqn])[0]
                pMax_charEqn = vMax_charEqn*iMax_charEqn
                Isc_nn_charEqn = cellModel(0)
                FF_charEqn = pMax_charEqn/(Voc_nn_charEqn*Isc_nn_charEqn)
            else:
                dontFindBounds = True
                iMax_charEqn = nan
                pMax_charEqn = nan
                Isc_nn_charEqn = nan
                FF_charEqn = nan
    
            #there is a maddening bug in SmoothingSpline: it can't evaluate 0 alone, so I have to do this:
            try:
                Isc_nn = iFitSpline([0,1e-55])[0]
            except:
                Isc_nn = nan
    
            FF = pMax/(Voc_nn*Isc_nn)
    
            if (ier != 7) and (ier != 6) and (not dontFindBounds) and (type(fitCovariance) is not float):
                #error estimation:
                alpha = 0.05 # 95% confidence interval = 100*(1-alpha)
    
                nn = len(VV)    # number of data points
                p = len(fitParams) # number of parameters
    
                dof = max(0, nn - p) # number of degrees of freedom
    
                # student-t value for the dof and confidence level
                tval = t.ppf(1.0-alpha/2., dof) 
    
                lowers = []
                uppers = []
                #calculate 95% confidence interval
                for a, p,var in zip(range(nn), fitParams, np.diag(fitCovariance)):
                    sigma = var**0.5
                    lower = p - sigma*tval
                    upper = p + sigma*tval
                    lowers.append(lower)
                    uppers.append(upper)
    
            else:
                uppers = [nan,nan,nan,nan,nan]
                lowers = [nan,nan,nan,nan,nan]
    
            plotPoints = 1000
            fitX = np.linspace(VV[0],VV[-1],plotPoints)
            
            if ier < 5:
                modelY = cellModel(fitX)*outputScaleFactor
            else:
                modelY = np.empty(plotPoints)*nan
            splineY = iFitSpline(fitX)*outputScaleFactor
            graphData = {'vsTime':vsTime,'origRow':self.rows,'fitX':fitX,'modelY':modelY,'splineY':splineY,'i':II*outputScaleFactor,'v':VV,'Voc':Voc_nn,'Isc':Isc_nn*outputScaleFactor,'Vmax':vMax,'Imax':iMax*outputScaleFactor}		
    
            #export button
            exportBtn = QPushButton(self.ui.tableWidget)
            exportBtn.setText('Export')
            exportBtn.clicked.connect(self.handleButton)
            self.ui.tableWidget.setCellWidget(self.rows,self.cols.keys().index('exportBtn'), exportBtn)
              
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('pce')).setData(Qt.DisplayRole,round(pMax/area*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('pce')).setToolTip(str(round(pMax_charEqn/area*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('pmax')).setData(Qt.DisplayRole,round(pMax/area*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('pmax')).setToolTip(str(round(pMax_charEqn/area*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('jsc')).setData(Qt.DisplayRole,round(Isc_nn/area*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('jsc')).setToolTip(str(round(Isc_nn_charEqn/area*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('voc')).setData(Qt.DisplayRole,round(Voc_nn*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('voc')).setToolTip(str(round(Voc_nn_charEqn*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('ff')).setData(Qt.DisplayRole,round(FF,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('ff')).setToolTip(str(round(FF_charEqn,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rs')).setData(Qt.DisplayRole,round(Rs_fit*area,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rs')).setToolTip('[{0}  {1}]'.format(lowers[2]*area, uppers[2]*area))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rsh')).setData(Qt.DisplayRole,round(Rsh_fit*area,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rsh')).setToolTip('[{0}  {1}]'.format(lowers[3]*area, uppers[3]*area))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('jph')).setData(Qt.DisplayRole,round(Iph_fit/area*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('jph')).setToolTip('[{0}  {1}]'.format(lowers[1]/area*1e3, uppers[1]/area*1e3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('j0')).setData(Qt.DisplayRole,round(I0_fit/area*1e9,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('j0')).setToolTip('[{0}  {1}]'.format(lowers[0]/area*1e9, uppers[0]/area*1e9))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('n')).setData(Qt.DisplayRole,round(n_fit,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('n')).setToolTip('[{0}  {1}]'.format(lowers[4], uppers[4]))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('Vmax')).setData(Qt.DisplayRole,round(vMax*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('Vmax')).setToolTip(str(round(vMax_charEqn*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('area')).setData(Qt.DisplayRole,round(area,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('pmax2')).setData(Qt.DisplayRole,round(pMax*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('pmax2')).setToolTip(str(round(pMax_charEqn*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('isc')).setData(Qt.DisplayRole,round(Isc_nn*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('isc')).setToolTip(str(round(Isc_nn_charEqn*1e3,3)))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('iph')).setData(Qt.DisplayRole,round(Iph_fit*1e3,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('iph')).setToolTip('[{0}  {1}]'.format(lowers[1]*1e3, uppers[1]*1e3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('i0')).setData(Qt.DisplayRole,round(I0_fit*1e9,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('i0')).setToolTip('[{0}  {1}]'.format(lowers[0]*1e9, uppers[0]*1e9))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rs2')).setData(Qt.DisplayRole,round(Rs_fit,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rs2')).setToolTip('[{0}  {1}]'.format(lowers[2], uppers[2]))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rsh2')).setData(Qt.DisplayRole,round(Rsh_fit,3))
            self.ui.tableWidget.item(self.rows,self.cols.keys().index('rsh2')).setToolTip('[{0}  {1}]'.format(lowers[3], uppers[3]))
        
        else:#vs time
            graphData = {'vsTime':vsTime,'origRow':self.rows,'time':time,'i':II*outputScaleFactor,'v':VV}

        #file name
        self.ui.tableWidget.item(self.rows,self.cols.keys().index('file')).setText(fileName)
        self.ui.tableWidget.item(self.rows,self.cols.keys().index('file')).setToolTip(''.join(header))          
        
        #plot button
        plotBtn = QPushButton(self.ui.tableWidget)
        plotBtn.setText('Plot')
        plotBtn.clicked.connect(self.handleButton)
        self.ui.tableWidget.setCellWidget(self.rows,self.cols.keys().index('plotBtn'), plotBtn)
        self.ui.tableWidget.item(self.rows,self.cols.keys().index('plotBtn')).setData(Qt.UserRole,graphData)
        
        self.ui.tableWidget.resizeColumnsToContents()
        self.rows = self.rows + 1