def thompson_tau_test(data, alpha=0.05):
    """Not found in either scipy.stats or statsmddels.
    Uses the Thompson-Tau criteria to iteratively identify outliers until no more exist.

    Parameters
    ----------
    data: list or numpy array, 1-D
        Our dataset we are evaluating for outliers
    alpha: float, default is 0.05
        Our level of significance for detecting outliers

    Returns
    -------
    outliers_list: list
        A list containing all datapoints that we found to be an outlier by Thompson-Tau's criteria
    """
    data = _check_table(data, only_count=False)
    if alpha < 0 or alpha > 1:
        raise ValueError("Cannot have alpha level greater than 1 or less than 0")
    outlier_exist, outlier_table = True, []
    data_copy = np.copy(data)
    while outlier_exist:
        n, mu, s = len(data_copy), np.mean(data_copy), np.std(data_copy, ddof=1)
        ab_resid = np.abs(data_copy - mu) / s
        rejection = t.isf(alpha / 2, n - 2) * (n - 1) / (sqrt(n) * sqrt(n - 2 + pow(t.isf(alpha / 2, n - 2), 2)))
        is_outlier = ab_resid > rejection
        if np.sum(is_outlier) != 0:
            outlier_table.append(data_copy[np.argsort(ab_resid)][-1:][0])
            data_copy = data_copy[np.argsort(ab_resid)][:-1]
        else:
            outlier_exist = False
    return outlier_table
def regression_figure(x, y):
    plt.figure()
    plt.scatter(x, y)
    sxy = sum((x - x.mean())* (y - y.mean()))
    sxx = sum((x - x.mean())**2)
    syy = sum((y - y.mean())**2)
    ssr = (sxx*syy - sxy ** 2) / sxx
    # print sxy, sxx, syy, ssr
    b_estimator = sxy / sxx
    a_estimator = y.mean() - b_estimator * x.mean()
    # print b_estimator
    num = len(x)
    alpha = 0.1
    t_distr_value = t.isf(alpha / 2,num - 2)
    print 't value for confidence interval computation: ', t_distr_value
    interval_left_b = b_estimator - math.sqrt(ssr / ((num - 2) * sxx)) * t_distr_value
    interval_right_b = b_estimator + math.sqrt(ssr / ((num - 2) * sxx)) * t_distr_value
    interval_left_a = a_estimator - math.sqrt(sum(x ** 2) * ssr / (num * (num - 2) * sxx)) * t_distr_value
    interval_right_a = a_estimator + math.sqrt(sum(x ** 2) * ssr / (num * (num - 2) * sxx)) * t_distr_value
    plt.plot(x, a_estimator + b_estimator * x, linewidth = 7)
    plt.plot(x, interval_left_a + interval_left_b * x, linewidth = 1)
    plt.plot(x, interval_right_a + interval_right_b * x, linewidth = 1)
    plt.savefig('assign5_2_plot.pdf')
    
    print '------------------------------------------------'
    # judge hypothesis β = 0 at 1% level of significance
    test_stat = math.sqrt((num - 2) * sxx / ssr) * math.fabs(b_estimator)
    print 'value of test statistic: ', test_stat
    alpha_level = 0.01
    t_distr_value_judge = t.isf(alpha_level / 2,num - 2)
    print 't value for hypothesis test computation: ', t_distr_value_judge
    if test_stat > t_distr_value_judge:
        print 'hypothesis test result: reject'
    else:
        print 'hypothesis test result: accept'
def t_val_from_t_percentile(t_percentile, df, one_tailed = 0):
    """
    Find T score given T percentile, DF
    and if its 1 Tailed or 2 Tailed
    t_percentile: % in proportion
    """
    return round(t.isf(t_percentile, df), 3) if one_tailed else round(t.isf(t_percentile/2., df), 3)
Example #4
0
def ROC_CI(N, Vec_theta, alpha=0.05):
    """
    One-Dimensional Confidence-Interval Calculations
    Parameters
    ----------
    N
    Vec_theta
    alpha

    Returns
    -------
    theta_L
    theta_U
    """
    theta_L = np.zeros(Vec_theta.size)
    theta_U = np.zeros(Vec_theta.size)
    for i, theta in enumerate(Vec_theta):
        if theta != 0:
            alpha_2 = alpha / 2
        else:
            alpha_2 = alpha

        if N > 100 and theta > 0.1:
            d = N - 1
            sigma = sqrt(theta * (1 - theta))
            if theta == 0:
                theta_L[i] = 0
            else:
                theta_L[i] = theta - t.isf(alpha_2, df=d) * sigma / sqrt(N)
            theta_U[i] = theta + t.isf(alpha_2, df=d) * sigma / sqrt(N)
        elif N > 100 and theta < 0.1:
            if theta == 0:
                theta_L[i] = 0
            else:
                d_L = 2 * N * theta
                theta_L[i] = chi2.isf(1 - alpha_2, df=d_L) / (2 * N)
            d_U = 2 * (N * theta + 1)
            theta_U[i] = chi2.isf(alpha_2, df=d_U) / (2 * N)
        else:
            d1L = N - N * theta + 1
            d2L = N * theta
            if theta == 0:
                theta_L[i] = 0
            else:
                theta_L[i] = d2L / (d2L +
                                    d1L * f.isf(alpha_2, 2 * d1L, 2 * d2L))
            d1U = N * theta + 1
            d2U = N - N * theta
            theta_U[i] = d1U * f.isf(alpha_2, 2 * d1U, 2 * d2U) / (
                d2U + d1U * f.isf(alpha_2, 2 * d1U, 2 * d2U))

    # ensure increase
    for i in range(Vec_theta.size - 1):
        if theta_L[i + 1] < theta_L[i]:
            theta_L[i + 1] = theta_L[i]
        if theta_U[i + 1] < theta_U[i]:
            theta_U[i + 1] = theta_U[i]

    return theta_L, theta_U
Example #5
0
def grubbs(X, test='max', alpha=0.05):
    """
    Performs Grubbs' test for outliers recursively until the null hypothesis is
    true.
    Parameters
    ----------
    X : ndarray
        A numpy array to be tested for outliers.
    test : str
        Describes the types of outliers to look for. Can be 'min' (look for
        small outliers), 'max' (look for large outliers), or 'two-tailed' (look
        for both).
    alpha : float
        The significance level.
    Returns
    -------
    X : ndarray
        The original array with outliers removed.
    outliers : ndarray
        An array of outliers.
    """

    Z = zscore(X, ddof=1)  # Z-score
    N = len(X)  # number of samples

    # calculate extreme index and the critical t value based on the test
    if test == 'two-tailed':
        extreme_ix = lambda Z: np.abs(Z).argmax()
        t_crit = lambda N: t.isf(alpha / (2. * N), N - 2)
    elif test == 'max':
        extreme_ix = lambda Z: Z.argmax()
        t_crit = lambda N: t.isf(alpha / N, N - 2)
    elif test == 'min':
        extreme_ix = lambda Z: Z.argmin()
        t_crit = lambda N: t.isf(alpha / N, N - 2)
    else:
        raise ValueError("Test must be 'min', 'max', or 'two-tailed'")

    # compute the threshold
    thresh = lambda N: (N - 1.) / np.sqrt(N) * \
        np.sqrt(t_crit(N)**2 / (N - 2 + t_crit(N)**2))

    # create array to store outliers
    outliers = np.array([])

    # loop throught the array and remove any outliers
    del_index_list = []
    while abs(Z[extreme_ix(Z)]) > thresh(N):

        # update the outliers
        outliers = np.r_[outliers, X[extreme_ix(Z)]]
        # remove outlier from array
        X = np.delete(X, extreme_ix(Z))
        del_index_list.append(extreme_ix(Z))
        # repeat Z score
        Z = zscore(X, ddof=1)
        N = len(X)

    return X, outliers, del_index_list
Example #6
0
def cochrancox(n1, m1, var1, n2, m2, var2, alpha=0.05):
    v1, v2 = n1 - 1, n2 - 1
    _t = (m1 - m2) / np.sqrt(var1 / n1 + var2 / n2)
    _t_av1 = t.isf(alpha / 2, v1)
    _t_av2 = t.isf(alpha / 2, v2)
    var1_SE, var2_SE = var1 / n1, var2 / n2
    _t_a = (var1_SE * _t_av1 + var2_SE * _t_av2) / (var1_SE + var2_SE)
    return _t, (v1, v2), _t_a
def t_val_from_t_percentile(t_percentile, df, one_tailed=0):
    """
    Find T score given T percentile, DF
    and if its 1 Tailed or 2 Tailed
    t_percentile: % in proportion
    """
    return round(t.isf(t_percentile, df), 3) if one_tailed else round(
        t.isf(t_percentile / 2., df), 3)
Example #8
0
    def linReg(self, alpha=0.05, debug=False):
        '''
        Does linear regression on the model data vs. recorded data.

        Gives a 100(1-alpha)% confidence interval for the slope
        '''
        if debug or self._debug: print "linReg..."
	# set stuff up to make the code cleaner
	obs = self.observed
	mod = self.model
        obs_mean = np.mean(obs)
	mod_mean = np.mean(mod)
	n = mod.size
        df = n - 2

        # calculate square sums
        SSxx = np.sum(mod**2) - np.sum(mod)**2 / n
        SSyy = np.sum(obs**2) - np.sum(obs)**2 / n
        SSxy = np.sum(mod * obs) - np.sum(mod) * np.sum(obs) / n
        SSE = SSyy - SSxy**2 / SSxx
        MSE = SSE / df

        # estimate parameters
        slope = SSxy / SSxx
        intercept = obs_mean - slope * mod_mean
        sd_slope = np.sqrt(MSE / SSxx)
        r_squared = 1 - SSE / SSyy

        # calculate 100(1 - alpha)% CI for slope
        width = t.isf(0.5 * alpha, df) * sd_slope
        lower_bound = slope - width
        upper_bound = slope + width
        slope_CI = (lower_bound, upper_bound)

        # calculate 100(1 - alpha)% CI for intercept
        lower_intercept = obs_mean - lower_bound * mod_mean
        upper_intercept = obs_mean - upper_bound * mod_mean
        intercept_CI = (lower_intercept, upper_intercept)

        # estimate 100(1 - alpha)% CI for predictands
        predictands = slope * mod + intercept
        sd_resid = np.std(obs - predictands)
        y_CI_width = t.isf(0.5 * alpha, df) * sd_resid * \
            np.sqrt(1 - 1 / n)

        # return data in a dictionary
        data = {}
        data['slope'] = slope
        data['intercept'] = intercept
        data['r_2'] = r_squared
        data['slope_CI'] = slope_CI
        data['intercept_CI'] = intercept_CI
        data['pred_CI_width'] = y_CI_width
        data['conf_level'] = 100 * (1 - alpha)

        if debug or self._debug: print "...linReg done."

        return data
Example #9
0
    def linReg(self, alpha=0.05, debug=False):
        '''
        Does linear regression on the model data vs. recorded data.

        Gives a 100(1-alpha)% confidence interval for the slope
        '''
        if debug or self._debug: print "linReg..."
        # set stuff up to make the code cleaner
        obs = self.observed
        mod = self.model
        obs_mean = np.mean(obs)
        mod_mean = np.mean(mod)
        n = mod.size
        df = n - 2

        # calculate square sums
        SSxx = np.sum(mod**2) - np.sum(mod)**2 / n
        SSyy = np.sum(obs**2) - np.sum(obs)**2 / n
        SSxy = np.sum(mod * obs) - np.sum(mod) * np.sum(obs) / n
        SSE = SSyy - SSxy**2 / SSxx
        MSE = SSE / df

        # estimate parameters
        slope = SSxy / SSxx
        intercept = obs_mean - slope * mod_mean
        sd_slope = np.sqrt(MSE / SSxx)
        r_squared = 1 - SSE / SSyy

        # calculate 100(1 - alpha)% CI for slope
        width = t.isf(0.5 * alpha, df) * sd_slope
        lower_bound = slope - width
        upper_bound = slope + width
        slope_CI = (lower_bound, upper_bound)

        # calculate 100(1 - alpha)% CI for intercept
        lower_intercept = obs_mean - lower_bound * mod_mean
        upper_intercept = obs_mean - upper_bound * mod_mean
        intercept_CI = (lower_intercept, upper_intercept)

        # estimate 100(1 - alpha)% CI for predictands
        predictands = slope * mod + intercept
        sd_resid = np.std(obs - predictands)
        y_CI_width = t.isf(0.5 * alpha, df) * sd_resid * \
            np.sqrt(1 - 1 / n)

        # return data in a dictionary
        data = {}
        data['slope'] = slope
        data['intercept'] = intercept
        data['r_2'] = r_squared
        data['slope_CI'] = slope_CI
        data['intercept_CI'] = intercept_CI
        data['pred_CI_width'] = y_CI_width
        data['conf_level'] = 100 * (1 - alpha)

        if debug or self._debug: print "...linReg done."

        return data
Example #10
0
def bootstrap_estimate(b, n, k):
	s= Sample(k, n, lambda x: x)
	x=[sample_g_function(s) for i in range(b)]
	mu = numpy.mean(x)
	sd = numpy.std(x)
	se = sd / numpy.sqrt(b)
	lci = mu - se*t.isf(0.025, b-1)
	uci = mu + se*t.isf(0.025, b-1)
	return (n, mu, sd, lci, uci, numpy.amax(x))
Example #11
0
 def get_ci(self, alpha=0.95):
     '''
     Returns the confidence interval of the estimated parameters.
     You should call 'fit' before calling this method.
     @keyword alpha: Confidence level (default: 95%)
     '''        
     plo = self.p - t.isf(1. - alpha, self.dof)*np.sqrt(np.diag(self.pcov))
     pup = self.p + t.isf(1. - alpha, self.dof)*np.sqrt(np.diag(self.pcov))
     
     self.ci = np.zeros((len(self.p),2))
     self.ci[:,0] = plo
     self.ci[:,1] = pup
     return self.ci
def grubbs_test(data, alternative='two-sided', alpha=0.05):
    """Not found in either scipy.stats or statsmodels
    Used to determine if there exists one outlier in the dataset based on their dispersion from the mean.
    Note that this assumes that the data is normally distributed.

    Parameters
    ----------
    data: list or numpy array, 1-D
        The sample dataset we are evaluating for outliers
    alternative: str, {'two-sided', 'greater', 'less'}
        Whether we are evaluating only minimum values, maximum values or both
    alpha: float, default is 0.05
        Our alpha level for determining significant difference

    Returns
    -------
    If there is an outlier, returns the outlier. Else, returns None
    """
    if not isinstance(alternative, str):
        raise TypeError("Alternative Hypothesis is not of string type")
    if alternative.casefold() not in ['two-sided', 'greater', 'less']:
        raise ValueError("Cannot determine method for alternative hypothesis")
    if not isinstance(alpha, float):
        raise TypeError("Cannot discern alpha level for Grubb's test")
    if alpha > 1 or alpha < 0:
        raise ValueError("Alpha level must be within 0 and 1")
    data = _check_table(data, only_count=False)
    y_bar, s, n = np.mean(data), np.std(data, ddof=1), len(data)
    if alternative.casefold() == 'less':
        return_val = np.min(data)
        val = y_bar - return_val
        t_value = t.isf(alpha / (2 * n), n - 2)
    elif alternative.casefold() == 'greater':
        return_val = np.max(data)
        val = return_val - y_bar
        t_value = t.isf(alpha / (2 * n), n - 2)
    else:
        val = np.max([y_bar - np.min(data), np.max(data) - y_bar])
        if val == y_bar - np.min(data):
            return_val = np.min(data)
        else:
            return_val = np.max(data)
        t_value = t.isf(alpha / n, n - 2)
    g = val / s
    rejection_stat = ((n - 1) / sqrt(n)) * sqrt(pow(t_value, 2) / (n - 2 + pow(t_value, 2)))
    if g > rejection_stat:
        return return_val
    else:
        return None
Example #13
0
def kramers_v(x, y, bias_correction=True):
    """Calculates Cramer's V statistic for categorical-categorical association.

    Taken from https://github.com/shakedzy/dython/blob/master/dython/nominal.py
    Inspired by Shaked Zychlinski.

    This is a symmetric coefficient: V(x,y) = V(y,x)
    Original function taken from: https://stackoverflow.com/a/46498792/5863503
    Wikipedia: https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V

    Parameters:
    -----------
    x : list / NumPy ndarray / Pandas Series
        A sequence of categorical measurements
    y : list / NumPy ndarray / Pandas Series
        A sequence of categorical measurements
    bias_correction : Boolean, default = True
        Use bias correction from Bergsma and Wicher,
        Journal of the Korean Statistical Society 42 (2013): 323-328.

    Returns:
    --------
    float in the range of [0,1]
    """
    confusion_matrix = crosstab(x, y)
    c2 = chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = c2 / n
    r, k = confusion_matrix.shape
    if bias_correction:
        phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
        rcorr = r - ((r - 1) ** 2) / (n - 1)
        kcorr = k - ((k - 1) ** 2) / (n - 1)
        if min((kcorr - 1), (rcorr - 1)) == 0:
            warnings.warn(
                "Unable to calculate Cramer's V using bias correction. Consider using bias_correction=False",
                RuntimeWarning)
            return np.nan
        else:

            V = np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1)))
            # calculate p-value using V
            tval = t.isf(0.975, n-3)
            return V, t.sf(abs(tval), n-2)
    else:
        V = np.sqrt(phi2 / min(k - 1, r - 1))
        tval = t.isf(0.975, n-3)
        return V, t.sf(abs(tval), n-2)
Example #14
0
File: fitMod.py Project: svey/pyNMR
def getError(var_matrix, dof):
    a = 1 - 0.05 / 2
    factorSE = t.isf(a, dof)
    variance = np.diagonal(var_matrix)
    SE = np.sqrt(variance)
    error = np.abs(SE * factorSE)
    return error
Example #15
0
def grubbs(timeseries, debug, debug_path):
    """
    A timeseries is anomalous if the Z score is greater than the Grubb's score.
    """

    try:
        series = scipy.array([x[1] for x in timeseries])
        stdDev = scipy.std(series)
        if stdDev == 0:
            return False

        mean = np.mean(series)
        tail_average = tail_avg(timeseries, debug, debug_path)
        z_score = (tail_average - mean) / stdDev
        len_series = len(series)
        threshold = scipy_stats_t.isf(.05 / (2 * len_series), len_series - 2)
        threshold_squared = threshold * threshold
        grubbs_score = ((len_series - 1) / np.sqrt(len_series)) * np.sqrt(threshold_squared / (len_series - 2 + threshold_squared))

        return z_score > grubbs_score
    except:
        if debug:
            trace = traceback.format_exc()
            errorline = 'error in grubbs - %s\n' % str(trace)
            with open(debug_path + '/nab.earthgecko_skyline.algorithm.errors.txt', 'a') as errorfile:
                errorfile.write(errorline)
        return None
Example #16
0
    def solve(self):
        filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv"
        filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv"
        reader_old = csv.reader(urllib.urlopen(filename_old))
        reader_total = csv.reader(urllib.urlopen(filename_total))
        count_line_old = 3
        old_num = []
        for row in reader_old:
            if count_line_old > 0:
                count_line_old -= 1;
                continue;
            old_num.append(int(row[1]))
        count_line_total = 5
        total_num = []
        for row in reader_total:
            if count_line_total > 0:
                count_line_total -= 1
                continue
            total_num.append(int(row[4]))
        old_rate = []
        for i in range(len(old_num)):
            old_rate.append(100.0 * old_num[i-1] / total_num[i-1])

        a = pd.Series(old_rate)
        x = a.mean()
        std = a.std()
        var = a.var() # var = s^2
        z = t.isf(0.05, 31)
        mean_lower = x - std / math.sqrt(31) * z
        mean_upper = x + std / math.sqrt(31) * z
        std_lower = 31 * var / chi2.isf(0.05, 31)
        std_upper = 31 * var / chi2.isf(0.95, 31)
        result = [[mean_lower, mean_upper], [std_lower, std_upper]]
        print result
        return result
Example #17
0
 def parameters_significance_test(self):
     nk1 = (self.n - (self.k + 1))
     s2 = np.dot(self.residuals.T, self.residuals) / nk1
     da = self.gram_schmidt * s2
     self.log(da)
     t_alpha_nk1 = t.isf(df=nk1, q=self.alpha)
     result = True
     self.ltw('\\subsubsection{Istotność zmiennych objaśniających}\n')
     mx = 0
     for i in range(self.k + 1):
         self.log(f"Testowanie istotnosci zmiennej {i}")
         t_stat = self.params[i] / da[i, i]
         self.log(t_stat, t_alpha_nk1)
         self.ltw(f'\\[t_{{\\alpha_{{{i + 1}}}}} = {t_stat}\\]\n')
         self.ltw(f'\\[t_{{{self.alpha}, {nk1}}} = {t_alpha_nk1}\\]\n')
         if abs(t_stat) > mx and i:
             mx = abs(t_stat)
             self.least_important_param_name = self.var_names[i-1]
         if abs(t_stat) < t_alpha_nk1:
             result = False
             self.ltw(f'Zmienna ~$X_{{{i + 1}}}$ jest statystycznie nieistotna.\n')
             self.log(f"Zmienna {i} nieistotna")
         else:
             self.ltw(f'Zmienna ~$X_{{{i + 1}}}$ jest statystycznie istotna.\n')
     return result
Example #18
0
 def calcStudent(x1, x2):
     n1 = len(x1)
     n2 = len(x2)
     M1 = sum(x1) / n1
     M2 = sum(x2) / n2
     S1 = sum((x-M1)**2 for x in x1)/n1
     S2 = sum((x-M2)**2 for x in x2)/n2
     # Satervait
     for alpha in [0.2, 0.1, 0.05,0.01,0.001,0.0001]:
         alpha /= 2
         v1 = S1/n1
         v2 = S2/n2
         ta = (v1 * t.isf(alpha,n1-1) + v2 * t.isf(alpha,n2-1))/(v1+v2)
         print ta
         #print t.isf(alpha,n1+n2-2)
     return (M1 - M2) / (S1/(n1-1) + S2/(n2-1))**0.5
Example #19
0
 def solve(self):
     file_2010 = "http://py.mooctest.net:8081/dataset/temperature/temperature_2010.csv"
     file_2014 = "http://py.mooctest.net:8081/dataset/temperature/temperature_2014.csv"
     reader_2010 = csv.reader(urllib.urlopen(file_2010))
     reader_2014 = csv.reader(urllib.urlopen(file_2014))
     temperature_2010 = []
     i = 0
     for row in reader_2010:
         i += 1
         if i <= 6 or i >= 38:
             continue
         temperature_2010.append(float(row[8]))
     temperature_2014 = []
     i = 0
     for row in reader_2014:
         i += 1
         if i <= 5 or i >= 37:
             continue
         temperature_2014.append(float(row[8]))
     diff = []
     for i in range(len(temperature_2010)):
         diff.append(temperature_2014[i] - temperature_2010[i])
     d = np.mean(diff)
     sd = np.std(diff)
     result = d / (sd / np.sqrt(31))
     t_alpha = t.isf(0.05, 30)
     if result >= t_alpha:
         return "YES"
     else:
         return "NO"
Example #20
0
 def confidence_interval_b1(self, *, alpha):
     tmp_t_value = t.isf(alpha / 2, len(self.__x_data) - 2)
     print(tmp_t_value)
     return [
         self.__b1 - tmp_t_value * math.sqrt(self.__estimator_variance_b1),
         self.__b1 + tmp_t_value * math.sqrt(self.__estimator_variance_b1)
     ]
Example #21
0
def grubbs(samples_rtt):
	N = len(samples_rtt)
	G = (max(samples_rtt) - mean(samples_rtt)) / std(samples_rtt)
	a = 0.01
	crit_val = t.isf(a / N, N - 2)
	crit_reg = (crit_val ** 2 / (N - 2 + crit_val ** 2)) * (N - 1) / sqrt(N)
	return G, a, crit_reg
Example #22
0
def pth2Cth(pth, N, dz):
    """Convert threshold on partial correlation to equivalent threshold on its p-value"""
    #dz = 1  # dimension of conditioning variable
    df = max(N - dz - 2, 0)  # degrees of freedom
    y = -t.isf(1.0 - pth / 2.0, df, loc=0, scale=1) / math.sqrt(df)
    Cth = abs(y / math.sqrt(1.0 + y**2))
    return Cth
Example #23
0
 def solve(self):
     n = 51
     std = 4.9
     mean = 1.1
     t_value = t.isf(0.025, n - 1)
     stat_value = mean / (std / math.sqrt(n))
     return [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
Example #24
0
    def solve(self):
        filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv"
        filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv"
        reader_old = csv.reader(urllib.urlopen(filename_old))
        reader_total = csv.reader(urllib.urlopen(filename_total))
        count_line_old = 3
        old_num = []
        for row in reader_old:
            if count_line_old > 0:
                count_line_old -= 1
                continue
            old_num.append(int(row[1]))
        count_line_total = 5
        total_num = []
        for row in reader_total:
            if count_line_total > 0:
                count_line_total -= 1
                continue
            total_num.append(int(row[4]))
        old_rate = []
        for i in range(len(old_num)):
            old_rate.append(100.0 * old_num[i - 1] / total_num[i - 1])

        a = pd.Series(old_rate)
        x = a.mean()
        std = a.std()
        var = a.var()  # var = s^2
        z = t.isf(0.05, 31)
        mean_lower = x - std / math.sqrt(31) * z
        mean_upper = x + std / math.sqrt(31) * z
        std_lower = 31 * var / chi2.isf(0.05, 31)
        std_upper = 31 * var / chi2.isf(0.95, 31)
        result = [[mean_lower, mean_upper], [std_lower, std_upper]]
        print result
        return result
 def confidence_interval_of_mu_x(self, x0, alpha):
     y0 = self.a + self.b * x0
     t_value = t.isf(alpha / 2, self.n - 2)
     sigma = math.sqrt(self.sigma_sqr_of_epsilon)
     others = math.sqrt((1 / self.n) + (x0 - self.mean_x) ** 2 / self.S_xx)
     self.upper_bound = y0 + t_value * sigma * others
     self.lower_bound = y0 - t_value * sigma * others
def extreme_studentized_deviate_test(data, num_outliers=1, alpha=0.05):
    """Not found in either scipy.stats or statsmodels
    Used when we think there are at most k outliers, as other tests such as Grubbs or Tietjen-Moore rely on there existing
    exactly k number of outliers. Note that this assumes the data is normally distributed.

    Parameters
    ----------
    data: list or numpy array, 1-D
        The data we are analyzing for outliers
    num_outliers: int, default is 1
        The maximum number of outliers we are checking for
    alpha: float, default is 0.05
        The level of significance for determining outliers

    Returns
    -------
    max_outliers: int
        The maximum number of outliers that out test found to exist
    outliers: list
        The outliers corresponding to num_outliers
    """
    data = _check_table(data, only_count=False)
    if not isinstance(num_outliers, int):
        raise TypeError("Number of outliers must be an integer")
    if num_outliers < 0:
        raise ValueError("Cannot test for negative amount of outliers")
    r = np.zeros(num_outliers)
    if alpha >= 1 or alpha <= 0:
        raise ValueError("Alpha level must be within 0 and 1")
    outliers = []
    data_copy = np.copy(data)
    n = len(data)
    if num_outliers > n:
        raise ValueError("Cannot have number of outliers greater than number of observations")
    for i in range(1, num_outliers+1):
        y_bar = np.mean(data_copy)
        s = np.std(data_copy, ddof=1)
        abs_resids = np.abs(data_copy - y_bar)
        r_i = np.max(abs_resids) / s
        p = 1 - (alpha / (2 * (n - i + 1)))
        lambda_i = ((n - i) * t.isf(p, n - i - 1)) / sqrt((n - i - 1 + pow(t.isf(p, n - i - 1), 2)) * (n - i + 1))
        r[i-1] = r_i > abs(lambda_i)
        outliers.append(data_copy[np.argsort(abs_resids)][-1:][0])
        data_copy = data_copy[np.argsort(abs_resids)][:-1]
    max_outliers = np.max(np.where(r == 1)[0]) + 1
    return max_outliers, outliers[:max_outliers]
Example #27
0
 def solve(self):
     n = 20
     std = 2.2
     mean = 4.6
     standard = 5
     stat_value =(mean - standard) / (std / math.sqrt(n))
     t_value = t.isf(0.025, n - 1)
     return [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
	def solve(self):
		n = 51
		std = 4.9
		mean = 1.1
		t_value = t.isf(0.025, n - 1)
		stat_value = mean / (std / np.sqrt(n))
		print [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
		return [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
Example #29
0
 def get_ci(self, alpha=0.05):
     '''
     Returns the confidence interval of the estimated parameters.
     You should call 'fit' before calling this method.
     @keyword alpha: Percentile (default: 0.05, which means 95% confidence)
     '''
     self._alpha = alpha
     plo = self._p - t.isf(alpha/2., self._dof)*self._delta
     pup = self._p + t.isf(alpha/2., self._dof)*self._delta
     if self._modelPhysParamsDict[self.model]:
         self._phys_plo = plo
         self._phys_pup = pup
     
     self._ci = np.zeros((len(self._p),2))
     self._ci[:,0] = plo
     self._ci[:,1] = pup
     return self._ci
 def genThreshold(self, strThresholdMode, tpFrameShape=None):
     lsStrThresholdModeColumn = strThresholdMode.split('-')
     sigmaMatch = re.match('as(\d*\.?\d*)sigma',
                           lsStrThresholdModeColumn[2])
     fpeMatch = re.match('(\d*\.?\d*)fpe', lsStrThresholdModeColumn[2])
     if tpFrameShape is None:
         if self.arrStdBGFrame is not None:
             tpFrameShape = self.arrStdBGFrame.shape
         elif self.arrKurtosisBGFrame is not None:
             tpFrameShape = self.arrKurtosisBGFrame.shape
     if sigmaMatch is not None:
         prob = norm.sf(float(sigmaMatch.group(1)))
     elif fpeMatch is not None:
         prob = 1 / (np.prod(tpFrameShape) * float(fpeMatch.group(1)))
     if lsStrThresholdModeColumn[0] == 'frame':
         if lsStrThresholdModeColumn[1] == 'norm':
             return norm.isf(prob) * self.dicPHStats['std']
         elif lsStrThresholdModeColumn[1] == 't':
             validKurtosis = max(dicPHStats['kurtosis'], 0)
             if validKurtosis == 0:
                 return norm.isf(prob) * self.dicPHStats['std']
             else:
                 nu = 6 / validKurtosis + 4
                 scale = dicPHStats['std'] * sqrt((nu - 2) / nu)
                 return t.isf(prob, df=nu, scale=scale)
     if lsStrThresholdModeColumn[0] == 'pixel':
         if lsStrThresholdModeColumn[1] == 'norm':
             return norm.isf(prob) * self.arrStdBGFrame
         elif lsStrThresholdModeColumn[1] == 't':
             arrIsTargetKurtosisFrame = ~np.isnan(self.arrKurtosisBGFrame)
             arrIsTargetKurtosisFrame[arrIsTargetKurtosisFrame] *= (
                 self.arrKurtosisBGFrame[arrIsTargetKurtosisFrame] <= 0)
             arrRet = np.ones(tpFrameShape) * np.nan
             arrRet[arrIsTargetKurtosisFrame] = (
                 norm.isf(prob) *
                 self.arrStdBGFrame[arrIsTargetKurtosisFrame])
             arrIsTargetKurtosisFrame = ~np.isnan(self.arrKurtosisBGFrame)
             arrIsTargetKurtosisFrame[arrIsTargetKurtosisFrame] *= (
                 self.arrKurtosisBGFrame[arrIsTargetKurtosisFrame] > 0)
             arrValidNu = (
                 6 / self.arrKurtosisBGFrame[arrIsTargetKurtosisFrame] + 4)
             arrRet[arrIsTargetKurtosisFrame] = (
                 t.isf(prob, df=arrValidNu) *
                 self.arrStdBGFrame[arrIsTargetKurtosisFrame] * np.sqrt(
                     (arrValidNu - 2) / arrValidNu))
             return arrRet
Example #31
0
 def solve(self):
     n = 25
     mean = 7.73
     std = 0.77
     u0 = 8
     stat_value = (mean - u0) / (std / math.sqrt(n))
     t_value = t.isf(0.05, n - 1)
     return [round(n-1, 2), round(stat_value, 2), not math.fabs(stat_value) >= t_value]
Example #32
0
    def confidence_prediction_bands(self, x_fit, results, confidence_interval,
                                    pcov):
        """
        Computes confidence prediction bands.

        Parameters
        ----------
        x_fit : array_like
        results : int
        confidence_interaval : int
        pcov : numpy.array

        Returns
        -------
        cp_band_0, cp_band_1 : array_like
        """
        param_names = []
        param_values = []
        param_deltas = []
        for pname in self.minimizer.params.keys():
            if self.minimizer.params[pname].vary:
                param_names.append(pname)
                param_values.append(results[pname])
                param_deltas.append(1e-5 * results[pname])

        x_m_0s = numpy.empty_like(x_fit)
        f_m_0s = numpy.empty_like(x_fit)
        for i, xx in enumerate(x_fit):
            x_m_0s[i] = x_fit[i]
            f_m_0s[i] = self.equation(results, xx)

        diag_delta = numpy.diag(param_deltas)
        dxdbeta = numpy.empty([len(param_values), len(x_fit)])

        for i, value in enumerate(param_values):

            adj_param_values = param_values + diag_delta[i]

            for j, pname in enumerate(param_names):
                results[pname] = adj_param_values[j]

            for j, x_m_0 in enumerate(x_m_0s):
                dxdbeta[i][j] = (self.equation(results, x_m_0) -
                                 f_m_0s[j]) / diag_delta[i][i]

        variance = numpy.empty(len(x_fit))
        for i, gprime in enumerate(dxdbeta.T):
            variance[i] = gprime.T.dot(pcov).dot(gprime)

        critical_value = t.isf(0.5 * (confidence_interval + 1.0),
                               len(param_names))

        confidence_half_widths = critical_value * numpy.sqrt(variance)

        cp_band_0 = f_m_0s - confidence_half_widths
        cp_band_1 = f_m_0s + confidence_half_widths

        return cp_band_0, cp_band_1
 def test_b_is_zero(self, alpha):
     if self.sigma_sqr_of_epsilon == None:
        self.var_of_epsilon()
     self.H0_test_value = abs(self.b * math.sqrt(self.S_xx) / math.sqrt(self.sigma_sqr_of_epsilon))
     self.H0_t_value = t.isf(alpha / 2, self.n - 2)
     if self.H0_test_value >= self.H0_t_value:
        self.H0_valid = False
     else:
        self.H0_valid = True
 def prediction_interval_of_Y(self, x0, alpha):
     y0 = self.a + self.b * x0
     t_value = t.isf(alpha / 2, self.n - 2)
     if self.sigma_sqr_of_epsilon == None:
        self.var_of_epsilon()
     sigma = math.sqrt(self.sigma_sqr_of_epsilon)
     others = math.sqrt(1 + (1 / self.n) + (x0 - self.mean_x) ** 2 / self.S_xx)
     self.upper_bound = y0 + t_value * sigma * others
     self.lower_bound = y0 - t_value * sigma * others
Example #35
0
    def sample_size(self, alpha, beta, nlimit=10000):
        """
        :param alpha: risk alpha
        :param beta: risk beta
        :param nlimit:
        :return: an estimate of the required sample size
        """
        n = 3
        d = np.abs(self.m1 - self.m2)
        tbeta = t.isf(beta, n)
        talpha = t.isf(alpha, self.n1)
        while d*np.sqrt(n)/self.pooled_s < (tbeta+talpha):

            n += 1
            tbeta = t.isf(beta, n)
            if n > nlimit:
                break
        return n
Example #36
0
def compute_theta_sym(alpha_sym, number_of_walks_ran, length_of_walk):
    """
    Computes the threshold difference in the truncated hitting times of two nodes for the null hypothesis of the
    two nodes being path-symmetric to be violated at a significance level given by alpha_sym.

    return: theta_sym: used as a parameter for clustering based on truncated hitting time
    """

    return ((length_of_walk - 1) / (2 * number_of_walks_ran) ** 0.5) * t.isf(alpha_sym, df=number_of_walks_ran - 1)
Example #37
0
 def solve(self):
     lower = 18.985
     upper = 21.015
     mean = (lower + upper) / 2
     delta = mean - lower
     n = 36
     t_value = t.isf(0.025, n - 1)
     std = delta * math.sqrt(n) / t_value
     return [round(mean, 2), round(std, 2)]
Example #38
0
    def prefiction_interval_yh(self, *, xh, alpha):
        tmp_t_value = t.isf(alpha / 2, len(self.__x_data) - 2)
        tmp_value = math.sqrt(self.__MSE * (1 + 1 / len(self.__x_data) + pow(
            (xh - self.__mean_x), 2) / self.__sxx))

        return [
            self.y_hat(x=xh) - tmp_t_value * tmp_value,
            self.y_hat(x=xh) + tmp_t_value * tmp_value
        ]
Example #39
0
	def solve(self):
		n1 = 22
		n2 = 22
		std2 = 45.1
		mean2 = 52.1
		std1 = 26.4
		mean1 = 27.1
		s_w_square = ((n1 - 1) * std1 * std1 + (n2 - 1) * std2 * std2) / (n1 + n2 - 2)
		stat_value = (mean1 - mean2) / (math.sqrt(s_w_square) * math.sqrt(1.0 / n1 + 1.0 / n2))
		t_value = t.isf(0.05, n1 + n2 - 2)
		return [min(n1 - 1, n2 - 1), -round(stat_value, 2), not stat_value <= -t_value]
Example #40
0
 def solve(self):
     n = 20
     std = 2.2
     mean = 4.6
     standard = 5
     stat_value = (mean - standard) / (std / math.sqrt(n))
     t_value = t.isf(0.025, n - 1)
     return [
         round(n - 1, 2),
         round(stat_value, 2), not stat_value <= -t_value
     ]
Example #41
0
 def solve(self):
     n = 25
     mean = 7.73
     std = 0.77
     u0 = 8
     stat_value = (mean - u0) / (std / math.sqrt(n))
     t_value = t.isf(0.05, n - 1)
     return [
         round(n - 1, 2),
         round(stat_value, 2), not math.fabs(stat_value) >= t_value
     ]
Example #42
0
 def confidence_interval_yh(self, *, xh, alpha):
     tmp_t_value = t.isf(alpha / 2, len(self.__x_data) - 2)
     print('2a - t value : ', tmp_t_value)
     print("2a - Y hat : ", self.y_hat(x=xh))
     print('2a - S^2 : ', self.estimator_variance_yh(xh=xh))
     return [
         self.y_hat(x=xh) -
         tmp_t_value * math.sqrt(self.estimator_variance_yh(xh=xh)),
         self.y_hat(x=xh) +
         tmp_t_value * math.sqrt(self.estimator_variance_yh(xh=xh))
     ]
Example #43
0
def bwt_ave(x):

    x_median = np.median(x)
    x_mad = np.median(np.abs(x - np.median(x)))

    bwt_ave = 0.0
    while np.around(np.abs(bwt_ave - x_median), 8) > 0:
        bwt_ave, bwt_std = iter_bwt(x, x_median, x_mad)
        x_median = bwt_ave

    chi2_68_left = chi2.ppf(0.32 / 2.0, len(x) - 1)
    chi2_68_right = chi2.isf(0.32 / 2.0, len(x) - 1)
    t_68 = t.isf(0.32 / 2.0, long(0.7 * (len(x) - 1)))

    bwt_ave_low = bwt_ave + t_68 * bwt_std / np.sqrt(len(x))
    bwt_ave_up = bwt_ave - t_68 * bwt_std / np.sqrt(len(x))
    bwt_std_low = (np.sqrt((len(x) - 1) / chi2_68_left) - 1.0) * bwt_std
    bwt_std_up = (np.sqrt((len(x) - 1) / chi2_68_right) - 1.0) * bwt_std

    return (bwt_ave, bwt_ave_low, bwt_ave_up), (bwt_std, bwt_std_low, bwt_std_up)
Example #44
0
File: t.py Project: ronrest/pyrpy
def qt(q, df=1, loc=0, scale=1, ncp=None, lowertail=True, log=False):
    """
    The quantile function for the t distribution.
    You provide a quantile (eg q=0.75) or array of quantiles, and it returns the
    value along the t distribution that corresponds to the qth quantile.

    So using a value of q=0.30 means that 30% of the values are below the
    returned value. So it essentially gives us the cut off point for the lowest
    30% of values. If you want the cutoff point for the top 30% of values, then
    use lowertail=False.

    ARGS:
    ---------------
    :param q (float, array of floats):
        The quantile(s)
    :param df (float):
        degrees of freedom
    :param loc: array_like, optional
        location parameter (default=0)
    :param scale: float, optional
        scale (default=1)
    :param ncp (float):
        non-centrality parameter delta.
        Currently not implemented.
    :param lowertail (boolean):
        Lower tail?
    :param log: (boolean)
        use log?
        Currently not implemented
    RETURN:
    ---------------
    :return:        an array of the value(s) corresponding to the quantiles q
    """
    # ==========================================================================
    if log:
        raise NotImplementedError("Log option is not implemented yet.")
    elif lowertail:
        return t.ppf(q=q, df=df, loc=loc, scale=scale)
    else:
        return t.isf(q=q, df=df, loc=loc, scale=scale)
Example #45
0
def background_subtract_line(profile, profile_sd, background_mask):
    """
    Performs a linear background subtraction on a 1D peak profile

    Parameters
    ----------
    profile : np.ndarray
        1D profile
    profile_sd : np.ndarray
        standard deviations for profile
    background_mask : array_like
        array of bool that specifies which Y pixels to use for background
        subtraction.
    """

    # which values to use as a background region
    mask = np.array(background_mask).astype("bool")
    x_vals = np.where(mask)[0]

    try:
        y_vals = profile[x_vals]
    except IndexError:
        print(x_vals)

    y_sdvals = profile_sd[x_vals]
    x_vals = x_vals.astype("float")

    # some SD values may have 0 SD, which will screw up curvefitting.
    y_sdvals = np.where(y_sdvals == 0, 1, y_sdvals)

    # equation for a straight line
    def f(x, a, b):
        return a + b * x

    # estimate the linear fit
    y_bar = np.mean(y_vals)
    x_bar = np.mean(x_vals)
    bhat = np.sum((x_vals - x_bar) * (y_vals - y_bar))
    bhat /= np.sum((x_vals - x_bar) ** 2)
    ahat = y_bar - bhat * x_bar

    # get the weighted fit values
    # we know the absolute sigma values
    popt, pcov = curve_fit(f, x_vals, y_vals, sigma=y_sdvals, p0=np.array([ahat, bhat]), absolute_sigma=True)

    def CI(xx, pcovmat):
        return pcovmat[0, 0] + pcovmat[1, 0] * xx + pcovmat[0, 1] * xx + pcovmat[1, 1] * (xx ** 2)

    bkgd = f(np.arange(np.size(profile, 0)), popt[0], popt[1])

    # now work out confidence intervals
    # TODO, should this be confidence interval or prediction interval?
    # if you try to do a fit which has a singular matrix
    if np.isfinite(pcov).all():
        bkgd_sd = np.asarray([CI(x, pcov) for x in np.arange(len(profile))], dtype="float64")
    else:
        bkgd_sd = np.zeros_like(bkgd)

    bkgd_sd = np.sqrt(bkgd_sd)

    # get the t value for a two sided student t test at the 68.3 confidence
    # level
    bkgd_sd *= t.isf(0.1585, np.size(x_vals, 0) - 2)

    return EP.EPsub(profile, profile_sd, bkgd, bkgd_sd)
#

from scipy.stats import chi2, t, f
import numpy as np

# Q1
q1_1 = chi2.isf(q=0.95, df=4)
assert np.allclose(q1_1, 0.710723)
q1_2 = chi2.isf(q=0.05, df=4)
assert np.allclose(q1_2, 9.48773)
q1_3 = chi2.isf(q=0.95, df=9)
assert np.allclose(q1_3, 3.32511)
q1_4 = chi2.isf(q=0.05, df=9)
assert np.allclose(q1_4, 16.9190)

# Q2
q2_1 = t.isf(q=0.05, df=7)
assert np.allclose(q2_1, 1.895, rtol=1.e-3)
q2_2 = t.isf(q=0.025, df=7)
assert np.allclose(q2_2, 2.365, rtol=1.e-3)
q2_3 = t.isf(q=0.05, df=12)
assert np.allclose(q2_3, 1.782, rtol=1.e-3)
q2_4 = t.isf(q=0.025, df=12)
assert np.allclose(q2_4, 2.179, rtol=1.e-3)

# Q3
q3_1 = f.isf(q=0.05, dfn=5, dfd=7)
assert np.allclose(q3_1, 3.9715)
q3_2 = f.isf(q=0.95, dfn=5, dfd=7)
assert np.allclose(q3_2, 0.2050903422957813)  # inverse of F(7,5; 0.05)
Example #47
0
def critical_t(percentile, df, one_tailed):
    return round(t.isf((100-percentile)/100., df), 3) if one_tailed else round(t.isf((100-percentile)/200., df), 3)
Example #48
0
 def _sim_EOS(self, a, da, b, db, c, dc, d, dd, *args):
     '''
     Simulates the EOS models to obtain average values and standard deviations of physical parameters.
     Only 4- and 5-parameter EOS models are currently supported.
     @param a: Average value of fitting parameter a
     @param da: Standard deviation of fitting parameter a
     @param b: Average value of fitting parameter b
     @param db: Standard deviation of fitting parameter b
     @param c: Average value of fitting parameter c
     @param dc: Standard deviation of fitting parameter c
     @param d: Average value of fitting parameter d
     @param dd: Standard deviation of fitting parameter d
     @param args: Additional parameters and their standard deviations (higher-order models)
     '''
     import random
     random.seed()
     # Sample fitting parameters
     N = 10000 # Number of samples
     t_scores = t.isf(self._alpha/2., self._dof)*self._delta
     a_vals = np.asarray([random.gauss(a, da/t_scores[0]) for i in range(N)])
     b_vals = np.asarray([random.gauss(b, db/t_scores[1]) for i in range(N)])
     c_vals = np.asarray([random.gauss(c, dc/t_scores[2]) for i in range(N)])
     d_vals = np.asarray([random.gauss(d, dd/t_scores[3]) for i in range(N)])
     if (len(args) > 0):
         # Check if len(args) == 2 (5-parameter models)
         if (len(args) == 2):
             e_vals = np.asarray([random.gauss(args[0], args[1]/t_scores[4]) for i in range(N)])
         else:
             print "ERROR: Currently cannot simulate EOS models with more than 5 parameters"
             print "Returning zero values from simulation"
             return [0., 0., 0., 0., [0. for i in range(len(args))]]
     
     if (self.model == EOSmodel.BM4):
         import BM4_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = BM4_aux.BM4_V0(b_vals, c_vals, d_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = BM4_aux.BM4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals)
         self._B0_vals = BM4_aux.BM4_B0(b_vals, c_vals, d_vals, self._V0_vals)
         self._B0p_vals = BM4_aux.BM4_B0p(b_vals, c_vals, d_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals)
     elif (self.model == EOSmodel.mBM4):
         import mBM4_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = mBM4_aux.mBM4_V0(b_vals, c_vals, d_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = mBM4_aux.mBM4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals)
         self._B0_vals = mBM4_aux.mBM4_B0(b_vals, c_vals, d_vals, self._V0_vals)
         self._B0p_vals = mBM4_aux.mBM4_B0p(b_vals, c_vals, d_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals)
     elif (self.model == EOSmodel.LOG4):
         import LOG4_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = LOG4_aux.LOG4_V0(b_vals, c_vals, d_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = LOG4_aux.LOG4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals)
         self._B0_vals = LOG4_aux.LOG4_B0(b_vals, c_vals, d_vals, self._V0_vals)
         self._B0p_vals = LOG4_aux.LOG4_B0p(b_vals, c_vals, d_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals)
     elif (self.model == EOSmodel.MO4):
         import MO4_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = MO4_aux.MO4_V0(b_vals, c_vals, d_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = MO4_aux.MO4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals)
         self._B0_vals = MO4_aux.MO4_B0(b_vals, c_vals, d_vals, self._V0_vals)
         self._B0p_vals = MO4_aux.MO4_B0p(b_vals, c_vals, d_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals)
     elif (self.model == EOSmodel.BM5):
         import BM5_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = BM5_aux.BM5_V0(b_vals, c_vals, d_vals, e_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         e_vals = e_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = BM5_aux.BM5_E0(a_vals, b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0_vals = BM5_aux.BM5_B0(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0p_vals = BM5_aux.BM5_B0p(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0pp_vals = BM5_aux.BM5_B0pp(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals), np.mean(self._B0pp_vals)
     elif (self.model == EOSmodel.mBM5):
         import mBM5_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = mBM5_aux.mBM5_V0(b_vals, c_vals, d_vals, e_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         e_vals = e_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = mBM5_aux.mBM5_E0(a_vals, b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0_vals = mBM5_aux.mBM5_B0(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0p_vals = mBM5_aux.mBM5_B0p(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0pp_vals = mBM5_aux.mBM5_B0pp(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals), np.mean(self._B0pp_vals)
     elif (self.model == EOSmodel.LOG5):
         import LOG5_aux
         # Obtain distribution of V0 from exact expressions
         V0_vals_ind, self._V0_vals = LOG5_aux.LOG5_V0(b_vals, c_vals, d_vals, e_vals, self.V)
         
         # Some values of V0 are invalid, so use only the valid values
         a_vals = a_vals[V0_vals_ind]
         b_vals = b_vals[V0_vals_ind]
         c_vals = c_vals[V0_vals_ind]
         d_vals = d_vals[V0_vals_ind]
         e_vals = e_vals[V0_vals_ind]
         
         # Obtain distributions of E0, B0, and B0p
         self._E0_vals = LOG5_aux.LOG5_E0(a_vals, b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0_vals = LOG5_aux.LOG5_B0(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0p_vals = LOG5_aux.LOG5_B0p(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         self._B0pp_vals = LOG5_aux.LOG5_B0pp(b_vals, c_vals, d_vals, e_vals, self._V0_vals)
         return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals), np.mean(self._B0pp_vals)
def confidence_prediction_bands(model, x_array, confidence_interval, f, flag=None):
    """
    This function calculates the confidence and prediction bands of the function f(x)
    from a best-fit model with uncertainties in its parameters as calculated (for example) 
    by the function nonlinear_least_squares_fit().

    The values are calculated via the delta method, which estimates the variance of f 
    evaluated at x as var(f(x)) = df(x)/dB var(B) df(x)/dB
    where df(x)/dB is the vector of partial derivatives of f(x) with respect to B 


    Parameters
    ----------
'    model : class instance
        As modified (for example) by the function nonlinear_least_squares_fit().
        Should contain the following functions:
            get_params, set_params, function, normal
        And attributes:
            delta_params, pcov, dof, noise_variance

    x_array : 2D numpy array
        coordinates at which to evaluate the bounds

    confidence_interval : float
        Probability level of finding the true model (confidence bound) or any new 
        data point (probability bound). For example, the 95% confidence bounds 
        should be calculated using a confidence interval of 0.95.

    f : function
        This is the function defining the variable y=f(x) for which the 
        confidence and prediction bounds are desired

    flag : variable type
        This (optional) flag is passed to model.function to control how the 
        modified position of x is calculated. This value is then used by f(x)

    Output
    ------
    bounds : 2D numpy array
        An element of bounds[i][j] gives the lower and upper confidence (i=0, i=1) and
        prediction (i=2, i=3) bounds for the jth data point.
    """
    
    # Check array dimensions
    n_dimensions = len(model.data[0])
    if len(x_array[0]) != n_dimensions:
        raise Exception('Dimensions of each point must be the same as the total number of dimensions')

        
    param_values = model.get_params()
    x_m_0s = np.empty_like(x_array)
    f_m_0s = np.empty_like(x_array[:,0])
    for i, x in enumerate(x_array):
        x_m_0s[i] = model.function(x, flag)
        f_m_0s[i] = f(x)
            
    diag_delta = np.diag(model.delta_params)
    dxdbeta = np.empty([len(param_values), len(x_array)])

    for i, value in enumerate(param_values):
        model.set_params(param_values + diag_delta[i])

        for j, x_m_0 in enumerate(x_m_0s):
            x_m_1 = model.function(x_m_0, flag)
            dxdbeta[i][j] = (f(x_m_1) - f_m_0s[j])/diag_delta[i][i]

    model.set_params(param_values) # reset params
    
    variance = np.empty(len(x_array))
    for i, Gprime in enumerate(dxdbeta.T):
        variance[i] = Gprime.T.dot(model.pcov).dot(Gprime)

    critical_value = t.isf(0.5*(confidence_interval + 1.), model.dof)
        
    confidence_half_widths = critical_value*np.sqrt(variance)
    prediction_half_widths = critical_value*np.sqrt(variance + model.noise_variance)
        
    confidence_bound_0 = f_m_0s - confidence_half_widths
    confidence_bound_1 = f_m_0s + confidence_half_widths
    prediction_bound_0 = f_m_0s - prediction_half_widths
    prediction_bound_1 = f_m_0s + prediction_half_widths
    

    return np.array([confidence_bound_0, confidence_bound_1,
                     prediction_bound_0, prediction_bound_1])
Example #50
0
x4  = bootstrap_estimate(b, 40,    197)
x5  = bootstrap_estimate(b, 80,    197)
x6  = bootstrap_estimate(b, 160,   197)
x7  = bootstrap_estimate(b, 320,   197)
x8  = bootstrap_estimate(b, 640,   197)
x9  = bootstrap_estimate(b, 1280,  197)
x10 = bootstrap_estimate(b, 2560,  197)
x11 = bootstrap_estimate(b, 5120,  197)
x12 = bootstrap_estimate(b, 10240, 197)
x13 = bootstrap_estimate(b, 20480, 197)

profile_n = numpy.array([x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13])

# Raw result
from pylab import * 
errorbar(numpy.log10(profile_n[:,0]), numpy.sqrt(profile_n[:,1]), yerr=numpy.sqrt(profile_n[:,2]*t.isf(0.25,b-1) / numpy.sqrt(b)))
show()

# Rescaled to total error
from pylab import * 
errorbar(numpy.log10(profile_n[:,0]), numpy.sqrt(profile_n[:,1]*197), yerr=numpy.sqrt(profile_n[:,2]*197.0*197.0*t.isf(0.25,b-1) / numpy.sqrt(b)))
show()

from pylab import * 
plot(numpy.log10(profile_n[:,0]), numpy.sqrt(profile_n[:,1]*197))
show()


# Doing this craziness so I can abort if it takes too long
b=30
x1  = bootstrap_estimate(b, 5,     6)
def estima_amostra(amostra_piloto, alpha, margem_erro):
	df = len(amostra_piloto) - 1
	std = np.std(amostra_piloto, dtype=np.float64)
	tval = t.isf(alpha/2, df)
	return math.ceil((tval*std/margem_erro)**2);
Example #52
0
        y = alpha
    return (y)

while True: #this while statement is the main program
    print ('\nThis program can calculate the t score, the t critical value and p value for a tail.')
    print ('This program will also conduct a basic one sample hypothesis test about the population mean, population standard deviation unknown.')
    alpha = alphaf()
    tails = tailchoice()
    revalpha = tailrev(tails,alpha)
    sampsize = ssize()
    df = sampsize - 1
    xval = xvalf()
    mean = meanf()
    stdev = stdevf()
    
    critval = t.isf(revalpha, df)

    print ('\nThe critical value corresponding to an alpha level of ', alpha, ' in a single tail is ', critval, '.\n', sep='')

    if tails == '3':
        print ('There is another tail at ', critval*-1, '\n', sep='')
        
    tval = (xval - mean)/(stdev/sampsize**0.5)
    
    if tails == '1':
        pval = t.sf(tval, df)
    elif tails == '2':
        pval = 1.0000 - t.sf(tval, df)
    else:
        pval = t.sf(tval, df)
        pval2 = 1.0000 - t.sf(tval, df)