コード例 #1
0
def main():
    cwd = os.getcwd()
    data = pd.read_csv(os.path.join(cwd, args.infile), header=0)

    ax = plt.subplot(2, 2, 1)
    ax.set_title('XY Position')
    ax.scatter(data['Px_gt'], data['Py_gt'], color='orange')
    ax.scatter(data['Px_pred'], data['Py_pred'], color='b')
    ax.legend()

    ax = plt.subplot(2, 2, 2)
    ax.set_title('XY Velocity')
    ax.scatter(data['Vx_gt'], data['Vy_gt'], color='orange')
    ax.scatter(data['Vx_pred'], data['Vy_pred'], color='b')
    ax.legend()

    nis_radar = data.loc[data.SensorType == 'R']['NIS'].as_matrix()
    chi_95p_3df = np.full(nis_radar.shape, chi2.isf(df=3, q=0.05))

    nis_lidar = data.loc[data.SensorType == 'L']['NIS'].as_matrix()
    chi_95p_2df = np.full(nis_radar.shape, chi2.isf(df=2, q=0.05))

    ax = plt.subplot(2, 2, 3)
    ax.set_title('Radar NIS')
    ax.plot(nis_radar, 'b')
    ax.plot(chi_95p_3df, 'orange')

    ax = plt.subplot(2, 2, 4)
    ax.set_title('Lidar NIS')
    ax.plot(nis_lidar, 'b')
    ax.plot(chi_95p_2df, 'orange')

    plt.tight_layout()
    plt.show()
コード例 #2
0
ファイル: main.py プロジェクト: Sanchous19/MatStat
def draw_d_interval_from_n(a):
    global n
    a = 1 - a
    xlist = np.arange(10, 200, 1)
    ylist = []
    for x in xlist:
        n = x
        define_list_y()
        s = math.sqrt(variance())
        chi2_val0 = chi2.isf((1 - (1 - a)) / 2, n - 1)
        chi2_val1 = chi2.isf((1 + (1 - a)) / 2, n - 1)
        ylist.append((n * (s ** 2) / chi2_val1) - (n * (s ** 2) / chi2_val0))
    plt.plot(xlist, ylist, label='Без матожидания')

    ylist = []
    for x in xlist:
        n = x
        define_list_y()
        s = math.sqrt(np.sum((list_y - m) ** 2) / (n - 1))
        chi2_val0 = chi2.isf((1 - (1 - a)) / 2, n)
        chi2_val1 = chi2.isf((1 + (1 - a)) / 2, n)
        ylist.append((n * (s ** 2) / chi2_val1) - (n * (s ** 2) / chi2_val0))
    plt.plot(xlist, ylist, label='С матожиданием')

    plt.title("Зависимость величины доверительного интервала\n от объёма выборки с доверительным значением"
              + str(1 - a))
    plt.legend(loc='upper left')
    plt.show()
コード例 #3
0
 def __init__(self, seed=42, num_inputs=11, kind='chisq', balance=0.5, noise='gauss', spread=0.1):
     """
     Initialize the instance
     :param seed: Int. The seed for the numpy random state.
     :param num_inputs: Int. Number of inputs in the input vector of each event.
     :param kind:  String. Type of event stream to generate.
     :param balance: Float between 0. and 1.0. Fraction of the 0 class in the event stream
     :param noise: String. One of gaussian, uniform
     :param spread: Float. Spread of the noise in terms of percentiles of the
     :return:
     """
     assert isinstance(seed, int)
     assert isinstance(num_inputs, int)
     assert kind in ('chisq', 'cauchy')
     assert noise in ('gauss', 'uniform')
     assert isinstance(balance, float)
     assert (balance >= 0.) and (balance <= 1.)
     self.seed = seed
     np.random.seed(seed)
     self.num_inputs = num_inputs
     self.kind = kind
     if kind == 'chisq':
         self.cutoff = chi2.isf(balance, df=num_inputs)
         self.spread = (chi2.isf(balance + spread/2., df=num_inputs) -
                        chi2.isf(balance - spread/2., df=num_inputs))
     elif kind == 'cauchy':
         assert (self.num_inputs % 2 == 0)  # only implemented for even number of inputs
         self.cauchy = cauchy(0., np.sqrt(self.num_inputs)/np.pi)
         self.cutoff = self.cauchy.isf(balance)
         self.spread = self.cauchy.isf(balance - spread/2.) - self.cauchy.isf(balance + spread/2.)
         print 'spread', self.spread
     self.noise = noise
コード例 #4
0
    def __init__(self, sigLocal, sig0, N0):
        # Convert significance to p-value
        pLocal = norm.sf(sigLocal)
        p0 = norm.sf(sig0)

        # Get the test statistic value corresponding to the p-value
        u = chi2.isf(pLocal * 2, 1)
        u0 = chi2.isf(p0 * 2, 1)

        # The main equations
        N = N0 * exp(-(u - u0) / 2.)
        pGlobal = N + chi2.sf(u, 1) / 2.

        # Further info
        sigGlobal = norm.isf(pGlobal)
        trialFactor = pGlobal / pLocal

        self.sigGlobal = sigGlobal
        self.sigLocal = sigLocal
        self.sig0 = sig0
        self.pGlobal = pGlobal
        self.pLocal = pLocal
        self.p0 = p0
        self.N0 = N0
        self.N = N
        self.u0 = u0
        self.u = u
        self.trialFactor = trialFactor
コード例 #5
0
  def __init__(self,sigLocal,sig0,N0):
    # Convert significance to p-value
    pLocal = norm.sf(sigLocal)
    p0 = norm.sf(sig0)
    
    # Get the test statistic value corresponding to the p-value
    u = chi2.isf(pLocal*2,1)
    u0 = chi2.isf(p0*2,1)
    
    # The main equations
    N = N0 * exp(-(u-u0)/2.)
    pGlobal = N + chi2.sf(u,1)/2.
    
    # Further info
    sigGlobal = norm.isf(pGlobal)
    trialFactor = pGlobal/pLocal

    self.sigGlobal = sigGlobal
    self.sigLocal = sigLocal
    self.sig0 = sig0
    self.pGlobal = pGlobal
    self.pLocal = pLocal
    self.p0 = p0
    self.N0 = N0
    self.N = N
    self.u0 = u0
    self.u = u
    self.trialFactor = trialFactor
コード例 #6
0
    def computeStats(self, time, data, old=True):
        if len(data) != len(time):
            raise ValueError("time and data have mismatched shape.")

        if len(data) < self.n_points * 2:
            raise ValueError("data must have " + str(self.n_points * 2) +
                             " points or more.")

        raw_time, raw_data = time[len(time) %
                                  self.n_points:], data[len(data) %
                                                        self.n_points:]

        raw_data = raw_data.reshape(
            (len(raw_data) // self.n_points, self.n_points)).swapaxes(0, 1)
        time = raw_time[self.n_points - 1::self.n_points]
        mean = numpy.mean(raw_data, axis=0)
        std = numpy.std(raw_data, axis=0, ddof=1)

        mean_err = std / numpy.sqrt(self.n_points)
        std_low = numpy.sqrt((self.n_points - 1) * std**2 /
                             chi2.isf(self.alpha / 2.0, self.n_points - 1))
        std_high = numpy.sqrt(
            (self.n_points - 1) * std**2 /
            chi2.isf(1 - self.alpha / 2.0, self.n_points - 1))

        return (time, mean, std, (mean_err, std_low, std_high), raw_data)
コード例 #7
0
    def solve(self):
        filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv"
        filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv"
        reader_old = csv.reader(urllib.urlopen(filename_old))
        reader_total = csv.reader(urllib.urlopen(filename_total))
        count_line_old = 3
        old_num = []
        for row in reader_old:
            if count_line_old > 0:
                count_line_old -= 1
                continue
            old_num.append(int(row[1]))
        count_line_total = 5
        total_num = []
        for row in reader_total:
            if count_line_total > 0:
                count_line_total -= 1
                continue
            total_num.append(int(row[4]))
        old_rate = []
        for i in range(len(old_num)):
            old_rate.append(100.0 * old_num[i - 1] / total_num[i - 1])

        a = pd.Series(old_rate)
        x = a.mean()
        std = a.std()
        var = a.var()  # var = s^2
        z = t.isf(0.05, 31)
        mean_lower = x - std / math.sqrt(31) * z
        mean_upper = x + std / math.sqrt(31) * z
        std_lower = 31 * var / chi2.isf(0.05, 31)
        std_upper = 31 * var / chi2.isf(0.95, 31)
        result = [[mean_lower, mean_upper], [std_lower, std_upper]]
        print result
        return result
コード例 #8
0
ファイル: Varianza.py プロジェクト: alexisbedoya/simu
def operacion(NSA):
    lista = np.array(NSA)
    print("----------------------------varianza--------------------")
    suma = 0
    for i in range(len(NSA)):
        suma = suma + NSA[i]
    res = suma / len(NSA)

    print("resultado: ", suma, " ´promedio: ", res)
    var = lista.var()
    print("varianza: ", var)
    desv = math.sqrt(var)
    print("desviacion estandar: ", desv)

    gradoLibertad = len(NSA) - 1
    error = 0.025
    x1 = chi2.isf(df=gradoLibertad, q=error)
    print(chi2.isf(df=gradoLibertad, q=error))

    gradoLibertad = len(NSA) - 1
    error = 0.975
    x2 = chi2.isf(df=gradoLibertad, q=error)
    print(x2)

    ls2 = x2 / (12 * (len(NSA) - 1))

    li2 = x1 / (12 * (len(NSA) - 1))
    print("LI: ", li2)
    print("LS: ", ls2)
    if (li2 <= var and var <= ls2) or (ls2 <= var and var <= li2):
        print("esta dentro del rango")
コード例 #9
0
    def ICVariancia(self, lista_de_medias):
        # Qui-quadrado para medir a variância
        n = len(lista_de_medias)  # Quantidade de amostras
        media = np.sum(lista_de_medias) / n  # Média das amostras

        # Usando função auxiliar (chi2.isf) para calcular o valor de qui-quadrado para n = 3200
        qui2Alpha = chi2.isf(q=0.025, df=n - 1)
        qui2MenosAlpha = chi2.isf(q=0.975, df=n - 1)

        # Variância das amostras: SOMA((Media - Media das Amostras)^2) = S^2
        s_quadrado = np.sum([(float(element) - float(media))**2
                             for element in lista_de_medias]) / (n - 1.0)

        # Calculo do IC para qui-quadrado
        inf = (n - 1) * s_quadrado / qui2MenosAlpha
        sup = (n - 1) * s_quadrado / qui2Alpha
        centro = inf + (sup - inf) / 2.0

        if centro / 10.0 < (
                sup - inf
        ):  # Se for maior do que 10% do valor central(precisão de 5%)
            self.ok = False  #então não atingiu a precisão adequada
        else:
            self.ok = True

        # retorna o limite inferior, limite superior, o valor central e se está dentro do intervalo
        return (inf, sup, centro, self.ok)
コード例 #10
0
def init_ab(day, answer, alph, beta):
    if day <= 0:
        for i in range(len(answer.index)):
            if alph.get(int(answer.iloc[i].loc['worker']), "None") == "None":
                alph.update({int(answer.iloc[i].loc['worker']): 1})
                beta.update({int(answer.iloc[i].loc['worker']): 1})
    else:
        mv = []
        for worker in alph.keys():
            w = int(worker)
            mv.append(alph[w] / (alph[w] + beta[w]))
        mv_mean = np.mean(mv)
        mv_var = np.var(mv) * len(mv) / (len(mv) - 1)

        lamda = mv_mean
        sigma = ((len(mv) - 1) * mv_var / chi2.isf(0.025,
                                                   len(mv) - 1) +
                 (len(mv) - 1) * mv_var / chi2.isf(0.975,
                                                   len(mv) - 1)) / 2

        new_alph = (1 - lamda) * lamda**2 / sigma - lamda
        new_beta = (1 - lamda)**2 * lamda / sigma - (1 - lamda)

        for i in range(len(answer.index)):
            if alph.get(int(answer.iloc[i].loc['worker']), "None") == "None":
                alph.update({int(answer.iloc[i].loc['worker']): new_alph})
                beta.update({int(answer.iloc[i].loc['worker']): new_beta})
コード例 #11
0
ファイル: my2.py プロジェクト: Tiny-Liu/professional-class
    def solve(self):
        filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv"
        filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv"
        reader_old = csv.reader(urllib.urlopen(filename_old))
        reader_total = csv.reader(urllib.urlopen(filename_total))
        count_line_old = 3
        old_num = []
        for row in reader_old:
            if count_line_old > 0:
                count_line_old -= 1;
                continue;
            old_num.append(int(row[1]))
        count_line_total = 5
        total_num = []
        for row in reader_total:
            if count_line_total > 0:
                count_line_total -= 1
                continue
            total_num.append(int(row[4]))
        old_rate = []
        for i in range(len(old_num)):
            old_rate.append(100.0 * old_num[i-1] / total_num[i-1])

        a = pd.Series(old_rate)
        x = a.mean()
        std = a.std()
        var = a.var() # var = s^2
        z = t.isf(0.05, 31)
        mean_lower = x - std / math.sqrt(31) * z
        mean_upper = x + std / math.sqrt(31) * z
        std_lower = 31 * var / chi2.isf(0.05, 31)
        std_upper = 31 * var / chi2.isf(0.95, 31)
        result = [[mean_lower, mean_upper], [std_lower, std_upper]]
        print result
        return result
コード例 #12
0
def get_inter_d(md, md_teor, a_list, n_list, inter=False):
    inter_d = []
    inter_d_teor = []
    bol = callable(md)
    if not bol:
        m = md[0]
        ds2 = md[1]
        ds2_teor = md_teor[1]
    for a, n in zip(a_list, n_list):
        if bol:
            _, Y = md(n)
            m = tochn_m(Y)
            ds2 = tochn_d(Y, m)
            ds2_teor = tochn_d(Y, md_teor[0])
        left = chi2.isf((1 + (1 - a)) / 2, n - 1)
        right = chi2.isf((1 - (1 - a)) / 2, n - 1)
        if inter:
            inter_d.append(((n - 1) * ds2 / left, (n - 1) * ds2 / right))
            inter_d_teor.append(
                ((n - 1) * ds2_teor / left, (n - 1) * ds2_teor / right))
        else:
            inter_d.append((n - 1) * ds2 / left - (n - 1) * ds2 / right)
            inter_d_teor.append((n - 1) * ds2_teor / left -
                                (n - 1) * ds2_teor / right)
    return inter_d, inter_d_teor
コード例 #13
0
    def Init_ab(self):
        if len(self.alph) == 0:
            for worker in self.workers:
                if self.alph.get(worker, "None") == "None":
                    self.alph.update({worker: 1})
                    self.beta.update({worker: 1})
        else:
            mv = []
            for worker in self.alph.keys():
                w = worker
                mv.append(self.alph[w] / (self.alph[w] + self.beta[w]))
            mv_mean = np.mean(mv)
            mv_var = np.var(mv) * len(mv) / (len(mv) - 1)

            lamda = mv_mean
            sigma = ((len(mv) - 1) * mv_var / chi2.isf(0.025,
                                                       len(mv) - 1) +
                     (len(mv) - 1) * mv_var / chi2.isf(0.975,
                                                       len(mv) - 1)) / 2

            new_alph = (1 - lamda) * lamda**2 / sigma - lamda
            new_beta = (1 - lamda)**2 * lamda / sigma - (1 - lamda)

            for worker in self.workers:
                if self.alph.get(worker, "None") == "None":
                    self.alph.update({worker: new_alph})
                    self.beta.update({worker: new_beta})
コード例 #14
0
def ROC_CI(N, Vec_theta, alpha=0.05):
    """
    One-Dimensional Confidence-Interval Calculations
    Parameters
    ----------
    N
    Vec_theta
    alpha

    Returns
    -------
    theta_L
    theta_U
    """
    theta_L = np.zeros(Vec_theta.size)
    theta_U = np.zeros(Vec_theta.size)
    for i, theta in enumerate(Vec_theta):
        if theta != 0:
            alpha_2 = alpha / 2
        else:
            alpha_2 = alpha

        if N > 100 and theta > 0.1:
            d = N - 1
            sigma = sqrt(theta * (1 - theta))
            if theta == 0:
                theta_L[i] = 0
            else:
                theta_L[i] = theta - t.isf(alpha_2, df=d) * sigma / sqrt(N)
            theta_U[i] = theta + t.isf(alpha_2, df=d) * sigma / sqrt(N)
        elif N > 100 and theta < 0.1:
            if theta == 0:
                theta_L[i] = 0
            else:
                d_L = 2 * N * theta
                theta_L[i] = chi2.isf(1 - alpha_2, df=d_L) / (2 * N)
            d_U = 2 * (N * theta + 1)
            theta_U[i] = chi2.isf(alpha_2, df=d_U) / (2 * N)
        else:
            d1L = N - N * theta + 1
            d2L = N * theta
            if theta == 0:
                theta_L[i] = 0
            else:
                theta_L[i] = d2L / (d2L +
                                    d1L * f.isf(alpha_2, 2 * d1L, 2 * d2L))
            d1U = N * theta + 1
            d2U = N - N * theta
            theta_U[i] = d1U * f.isf(alpha_2, 2 * d1U, 2 * d2U) / (
                d2U + d1U * f.isf(alpha_2, 2 * d1U, 2 * d2U))

    # ensure increase
    for i in range(Vec_theta.size - 1):
        if theta_L[i + 1] < theta_L[i]:
            theta_L[i + 1] = theta_L[i]
        if theta_U[i + 1] < theta_U[i]:
            theta_U[i + 1] = theta_U[i]

    return theta_L, theta_U
コード例 #15
0
ファイル: funcs.py プロジェクト: S1lentem/MMod
def get_interval_assessment(values, M=None, count=None):
    count = count if count is not None else len(values)
    M = M if M is not None else get_assessment_of_mathematical_expectation(
        values, count)

    d = sum([(value - M)**2 for value in values]) / (count - 1)
    c1 = count * d / chi2.isf((1 - 0.99) / 2, count - 1)
    c2 = count * d / chi2.isf((1 + 0.99) / 2, count - 1)

    return c1, c2
コード例 #16
0
def convertN(sig, sig0, N0):
    # Convert significance to p-value
    p = norm.sf(sig)
    p0 = norm.sf(sig0)
    # Get the test statistic value corresponding to the p-value
    u = chi2.isf(p * 2, 1)
    u0 = chi2.isf(p0 * 2, 1)
    # The main equation
    N = N0 * exp(-(u - u0) / 2.)
    return N
コード例 #17
0
def convertN(sig,sig0,N0):
  # Convert significance to p-value
  p = norm.sf(sig)
  p0 = norm.sf(sig0)
  # Get the test statistic value corresponding to the p-value
  u = chi2.isf(p*2,1)
  u0 = chi2.isf(p0*2,1)
  # The main equation
  N = N0 * exp(-(u-u0)/2.)
  return N
コード例 #18
0
ファイル: stat_research.py プロジェクト: igorxxl8/mmod
def intervals_estimate_discrete(n, m, d, _m, _d, q):
    s = sqrt(_d)

    k = s * t.ppf(q, n - 1) / sqrt(n - 1)
    print('\nConfidence interval for ME (quantile - {}):'.format(q))
    print('{} <= {} < {}'.format(_m - k, m, _m + k))

    k1 = n * _d / chi2.isf((1 - q) / 2, n - 1)
    k2 = n * _d / chi2.isf((1 + q) / 2, n - 1)
    print('\nConfidence interval for Dispersion (quantile - {}):'.format(q))
    print('{} <= {} < {}'.format(k1, d, k2))
コード例 #19
0
ファイル: calcTemporal.py プロジェクト: tiagoantao/mosquitoNe
def calcNe(F, genDiff, pop1Len, pop2Len):
    fk, df = F
    Nes = []
    for ft in [fk, df*fk/chi2.isf(0.025,df), df*fk/chi2.isf(0.975, df)]:
        Ne = (1.0*(genDiff))/ (2*(
             ft
               - 1.0/(2*pop1Len)
               - 1.0/(2*pop2Len)
             ))
        if Ne<0 or Ne>10000: Ne = float('inf')
        Nes.append(Ne)
    return Nes
コード例 #20
0
ファイル: main.py プロジェクト: Sanchous19/MatStat
def variance_confidence_interval(a, is_known_expected_value):
    if is_known_expected_value:
        s = math.sqrt(np.sum((list_y - m) ** 2) / (n - 1))
        variance_str = 'с известным матожиданием'
        freedom_degree = n
    else:
        s = math.sqrt(variance())
        variance_str = ''
        freedom_degree = n - 1
    chi2_val0 = chi2.isf((1 - (1 - a)) / 2, freedom_degree)
    chi2_val1 = chi2.isf((1 + (1 - a)) / 2, freedom_degree)
    print("Доверительный интервал для дисперсии:", (n * (s ** 2) / chi2_val0), "<= D <=", (n * (s ** 2) / chi2_val1),
          "со значимостью", 1 - a, variance_str)
コード例 #21
0
def chi2_independence(alpha, data):
    g, p, dof, expctd = chi2_contingency(data)
    if dof == 0:
        print('自由度应该大于等于1')
    elif dof == 1:
        cv = chi2.isf(alpha * 0.5, dof)
    else:
        cv = chi2.isf(alpha * 0.5, dof-1)
    if g > cv:
        re = 1  # 表示拒绝原假设
    else:
        re = 0  # 表示接受原假设
    return g, p, dof, re, expctd
コード例 #22
0
ファイル: calculadora.py プロジェクト: lfmnovaes/SimuladorAD
    def ICVariancia(self, media, variancia, n, v_analitico):
        # Qui-quadrado para medir a variância
        #s  = math.sqrt(variancia)
        s_quadrado = media
        sobreposicaoICs = False

        # Usando função auxiliar (chi2.isf) para calcular o valor de qui-quadrado para n = 3200
        qui2Alpha = chi2.isf(q=0.025, df=n - 1)
        qui2MenosAlpha = chi2.isf(q=0.975, df=n - 1)
        #print(f'qui2alpha = {qui2Alpha}; qui2MenosAlpha = {qui2MenosAlpha}')

        # Calculo do IC para qui-quadrado
        supChi = ((n - 1) * s_quadrado) / qui2MenosAlpha
        infChi = ((n - 1) * s_quadrado) / qui2Alpha
        centroChi = infChi + (supChi - infChi) / 2.0

        p_chi2 = (qui2Alpha - qui2MenosAlpha) / (qui2Alpha + qui2MenosAlpha)

        # Calcula o IC da variancia pela tStudent
        nQuad = math.sqrt(n)
        s = math.sqrt(variancia)
        tStudent = 1.960  # T-student para n>30 amostras

        # Variância das amostras: SOMA((Media - Media das amostras)^2) = S^2
        #s = math.sqrt(np.sum([(float(element) - float(media))**2 for element in lista_de_medias])/(n-1.0))
        infT = media - (tStudent * (s / nQuad))  # IC(inferior) pela T-student
        supT = media + (tStudent * (s / nQuad))  # IC(superior) pela T-student
        centroT = infT + (supT - infT) / 2.0  # Centro dos intervalos

        # Se intervalo for maior do que 10% do valor central(precisão de 5%), não atingiu precisão adequada
        p_tStudent = tStudent * (s / (media * nQuad))
        #print(f'precisao tStudent: {p_tStudent}')
        #print(f'IC Var T; sup: {supT}; inf: {infT}; centro: {centroT}')
        # Verificar se ha sobreposicao completa das duas ICs calculadas
        if (infT <= centroChi <= supT) == True and (infChi <= centroT <=
                                                    supChi) == True:
            sobreposicaoICs = True
        #print(f'precisao chi: {p_chi2}')
        if (p_chi2 and p_tStudent <= self.precisaoIC) == True and (
                infChi <= v_analitico <=
                supChi) == True and (infT <= v_analitico <=
                                     supT) == True and sobreposicaoICs == True:
            self.ok = True
        else:
            self.ok = False

        #print((p_chi2 and p_tStudent <= self.precisaoIC), (infChi <= v_analitico <= supChi), (infT <= v_analitico <= supT), sobreposicaoICs )
        # retorna o limite inferior, limite superior, o valor central e se está dentro do intervalo
        return (infChi, supChi, centroChi, self.ok, p_chi2, centroT,
                p_tStudent, infT, supT, sobreposicaoICs)
コード例 #23
0
 def confidence_interval_for_variance(variance, num_rounds,
                                      confidence_interval):
     """Calculates interval of confidence for the variance by the Chi-square distribution
     Returns the center of the interval, the upper and lower bounds and its precision"""
     alpha_lower = 1 - confidence_interval
     alpha_lower /= 2
     alpha_upper = 1 - alpha_lower
     df = num_rounds - 1
     chi2_lower = chi2.isf(alpha_lower, df)
     chi2_upper = chi2.isf(alpha_upper, df)
     lower = df * variance / chi2_lower
     upper = df * variance / chi2_upper
     precision = (chi2_lower - chi2_upper) / (chi2_lower + chi2_upper)
     center = (upper + lower) / 2
     return center, lower, upper, precision
コード例 #24
0
    def _get_binning_threshold(self, df: DataFrame, y: Series) -> Dict:
        """
        获取变量分箱阈值
        :param df: 所有变量数据
        :param y: 标签数据
        :return: 变量分箱区间字典
        """
        params = {
            # 卡方阈值
            "threshold":
            chi2.isf(1 - self.confidence_level,
                     y.unique().size - 1),
            "max_leaf_nodes":
            self.max_leaf_nodes,
            "min_samples_leaf":
            max(int(np.ceil(y.size * self.min_samples_leaf)), 50)
        }

        for col in df.columns:
            feat_type = self.features_info.get(col)
            nan_value = self.features_nan_value.get(col)
            bins, flag = self._bin_threshold(df[col],
                                             y,
                                             is_num=feat_type,
                                             nan_value=nan_value,
                                             **params)
            self.features_bins[col] = {'bins': bins, 'flag': flag}
コード例 #25
0
ファイル: outliers.py プロジェクト: sarahburnett/Piff
 def _get_thresh(self, dof):
     if self.thresh is not None:
         return self.thresh
     elif self.ndof is not None:
         return self.ndof * dof
     else:
         return chi2.isf(self.prob, dof)
コード例 #26
0
def compute_compatibility(observations, predictions):
    """ 
	Individual Compatibility Test
	"""
    compatibility = dict()
    compatibility = {'d2': None, 'IC': None}
    compatibility['d2'] = np.zeros(shape=(observations['M'], predictions['N']))
    compatibility['IC'] = np.zeros(shape=(observations['M'], predictions['N']))

    # Compute Individual Squared Mahalanobis Distances
    for i in range(observations['M']):
        z = observations['z'][i]
        R = observations['R_covariance'][i]
        # R = [1]
        for j in range(predictions['N']):
            C = np.add(predictions['H_P_H'][i], R)
            C_inverse = np.linalg.inv(C)
            # C_inverse = [1]
            # print(z,R,C, predictions['h_map_fn'][j])
            compatibility['d2'][i][j] = mahalanobis(z,
                                                    predictions['h_map_fn'][j],
                                                    C_inverse)

    # Check Mahalanobis Distance against critical values from a Chi2 Distribution.
    for i in range(observations['M']):
        for j in range(predictions['N']):
            if (compatibility['d2'][i][j] < chi2.isf(q=0.01, df=2)):
                compatibility['IC'][i][j] = 1
            else:
                compatibility['IC'][i][j] = 0

    return compatibility
def plot_LL_cpt(i=0):
    labels = [
        'Clearance$_{cpt,O/L}$', 'Clearance$_{cpt,B}$', 'Clearance$_{sn,O/L}$',
        'Bioactivation$_{cpt}$', 'transport Blood-Liver',
        'transport Blood-Organ'
    ]
    data = np.load('cost_pRange_k_best_cpt_param_' + str(i) + '.npz')
    cost = data['cost']
    pRange = data['pRange']
    k = data['k']
    best = data['best']
    Chi = chi2.isf(q=0.05, df=1)
    fig = plt.figure()
    plt.plot(pRange, cost, label='Likelihood profile')
    chi = Chi + best
    plt.plot(pRange, chi.repeat(len(pRange)), label='Confidence threshold')
    plt.plot(k, best, 'rx', label='Minimum')
    plt.ylim([best * 0.99, chi * 1.1])
    plt.title('CPT11 ' + labels[i])
    if i == 3:
        plt.xlabel('param value (mg/h)')
    else:
        plt.xlabel('param value (ml/h)')
    plt.ylabel('score')
    plt.legend()
    plt.show()
    with PdfPages('likelihood_profile_cpt_parameter_' + str(i) +
                  '.pdf') as pdf:
        pdf.savefig(fig, bbox_inches='tight')
コード例 #28
0
    def check_norm(X, names=False):
        n, p = X.shape
        M = []
        for i in range(X.shape[0]):
            M.append(list(X.mean(axis=0)))
        Sigma = X.cov()
        SigmaInv = inv(Sigma)
        diff = (X.values) - M
        GD = []
        for i in range(len(diff)):
            GD.append(diff[i].dot(SigmaInv).dot(diff[i]))

        chi = chi2.isf(q=0.5, df=p)
        print('ChiSquare table value is', chi, '\n')

        sum = 0
        for i in range(n):
            if GD[i] <= chi:
                sum = sum + 1

        print('Number of Observation below QQ line', sum, '\n')

        #Plotting
        obs = len(X)
        plt.scatter(range(obs), GD)
        plt.axhline(y=chi, color='r', linestyle='--')
        if (names == True):
            for i in range(obs):
                plt.annotate(i, (i, GD[i]))
            plt.show()
        print(
            '\n***Interpretation : Data is considered to be Normally distributed if Half of the Observations are below/above QQ line\n\tWhile More Observations below/above QQ line denotes deviation from Normality'
        )
コード例 #29
0
ファイル: g9.py プロジェクト: ababino/efe
def ej13c(cv, data):
    dfs = []
    for N in cv.index[:-2]:
        T, chi2pval, df = chi2_from_sample(data[:N])
        dfs.append(pd.DataFrame({'Eventos': [N], 'T': [T],
                                 'Test': ['$\chi^2$'],
                                 'Estadistico': ['Medido']}))
        Tc = chi2.isf(0.01, df)
        dfs.append(pd.DataFrame({'Eventos': [N], 'T': [Tc],
                                 'Test': ['$\chi^2$'],
                                 'Estadistico': ['Critico']}))
        T = kstest(data[:N], norm(0, 2.5).cdf)[0]
        Tc = cv.loc[N, 'T_k^{critico}']
        dfs.append(pd.DataFrame({'Eventos': [N], 'T': [T],
                                 'Test': ['Kolmolgorov'],
                                 'Estadistico': ['Medido']}))
        dfs.append(pd.DataFrame({'Eventos': [N], 'T': [Tc],
                                 'Test': ['Kolmolgorov'],
                                 'Estadistico':  ['Critico']}))
        T = cramer_von_mises(data[:N])
        Tc = cv.loc[N, 'T_c^{critico}']
        dfs.append(pd.DataFrame({'Eventos': [N], 'T': [T],
                                 'Test': ['Cramer Von-Mises'],
                                 'Estadistico': ['Medido']}))
        dfs.append(pd.DataFrame({'Eventos': [N], 'T': [Tc],
                                 'Test': ['Cramer Von-Mises'],
                                 'Estadistico': ['Critico']}))
    df = pd.concat(dfs)
    g = sns.FacetGrid(df, col='Test', hue='Estadistico', sharey=False)
    g.map(plt.scatter, 'Eventos', 'T')
    g.map(plt.plot, 'Eventos', 'T')
    g.add_legend()
    g.set(xticks=[0, 4000, 8000])
    plt.savefig('fig3.jpg')
    plt.show()
コード例 #30
0
ファイル: outliers.py プロジェクト: rmjarvis/Piff
 def _get_thresh(self, dof):
     if self.thresh is not None:
         return self.thresh
     elif self.ndof is not None:
         return self.ndof * dof
     else:
         return chi2.isf(self.prob, dof)
コード例 #31
0
ファイル: lab3.py プロジェクト: annahdigital/tvims_labs
def Pearsons_chi_squared_test(M, variation, n):
    A_pos, B_pos, f_pos = interval_method(variation, M, n)
    graphics(M, f_pos, A_pos, B_pos)
    v_interval = number_of_values_on_interval(M, variation, A_pos, B_pos)
    p_interval = [v_interval[i] / n for i in range(M)]
    p_theor = [F(B_pos[i]) - F(A_pos[i]) for i in range(M)]
    confidence_interval = 0.01
    if abs(1 - sum(p_theor)) > confidence_interval:
        confidence_interval *= 2
    chi_sq = n * sum([(p_theor[i] - p_interval[i])**2 / p_theor[i]
                      for i in range(M)])
    print("Pearson's chi-squared test:")
    if chi_sq < chi2.isf(confidence_interval, M - 1):
        print("True: ", chi_sq, " < ", chi2.isf(confidence_interval, M - 1))
    else:
        print("False: ", chi_sq, " > ", chi2.isf(confidence_interval, M - 1))
コード例 #32
0
def jointly_compatible(predictions, observations, H):
    """
	Bool check for Joint Compatibility
	"""
    d2 = joint_mahalanobis(predictions, observations, H)
    dof = 2 * len(H)
    return d2 < chi2.isf(q=0.01, df=dof)
コード例 #33
0
def information_gain(target_attribute, examples, attributes, classes, confidence = 0.05):
    # Find the entropy of the parent set by noting the frequency of each classification and then dividing by the size of the parent set
    class_counts = {c: 0 for c in classes}
    for example in examples: class_counts[example[-1]] += 1
    information_gain = entropy([class_counts[x]/len(examples) for x in class_counts])
    # Find the entropy of splitting the parent set by a certain attribute.
    # Entropy is calculated by summing over the entropies for each possible value of the attribute times the probability that it occurs in the parent set.
    attribute_entropy = 0
    total_deviation = 0
    # There are len(examples) - 1 degrees of freedom.
    chisquare_statistic = chi2.isf(confidence, len(examples) - 1)
    for a in attributes[target_attribute]:
        examples_subset = [e for e in examples if e[target_attribute] == a]
        if len(examples_subset) != 0:
            attribute_class_counts = {c: 0 for c in classes}
            for example in examples_subset: attribute_class_counts[example[-1]] += 1

            # Determine the deviation from expectation.
            observed = [attribute_class_counts[x] for x in attribute_class_counts]
            expected = [class_counts[x] * len(examples_subset) / len(examples) for x in attribute_class_counts]
            deviations = [(observed[i] - expected[i]) ** 2 / expected[i] for i in range(len(observed))]
            total_deviation += sum(deviations)

            attribute_entropy += entropy([attribute_class_counts[x]/len(examples_subset) for x in attribute_class_counts]) * len(examples_subset)/len(examples)
    if total_deviation > chisquare_statistic: return 0
    information_gain -= attribute_entropy
    return information_gain
def plot_LL_lohp(i=0):
    labels = [
        'Clearance$_{O/L}$', 'Clearance$_B$', 'Efflux/Uptake$_L$',
        'Efflux/Uptake$_O$', 'Bind', 'Unbind'
    ]
    data = np.load('cost_pRange_k_best_lohp_param_' + str(i) + '.npz')
    cost = data['cost']
    pRange = data['pRange']
    k = data['k']
    best = data['best']
    Chi = chi2.isf(q=0.05, df=1)
    fig = plt.figure()
    plt.plot(pRange, cost, label='Likelihood profile')
    chi = Chi + best
    plt.plot(pRange, chi.repeat(len(pRange)), label='Confidence threshold')
    plt.plot(k, best, 'rx', label='Minimum')
    plt.ylim([best * 0.99, chi * 1.1])
    plt.title('LOHP ' + labels[i])
    plt.xlabel('param value (ml/h)')
    plt.ylabel('score')
    plt.legend()
    plt.show()
    with PdfPages('likelihood_profile_lohp_parameter_' + str(i) +
                  '.pdf') as pdf:
        pdf.savefig(fig, bbox_inches='tight')
コード例 #35
0
ファイル: main.py プロジェクト: kail/csep546
    def _should_stop_splitting(data, attribute, attribute_vals, target_attr,
                               target_val, confidence):
        positive_count = len(
            [row for row in data if row[target_attr] == target_val])
        negative_count = len(data) - positive_count

        num_attrs = 0
        total = 0
        for val in attribute_vals:
            num_attrs += 1
            rows_with_value = [row for row in data if row[attribute] == val]
            local_positive_count = len([
                row for row in rows_with_value
                if row[target_attr] == target_val
            ])
            local_negative_count = len(rows_with_value) - local_positive_count
            expected_positive = len(rows_with_value) * positive_count / len(
                data)
            expected_negative = len(rows_with_value) * negative_count / len(
                data)

            positive_part = ((local_positive_count - expected_positive)**
                             2) / expected_positive if expected_positive else 0
            negative_part = ((local_negative_count - expected_negative)**
                             2) / expected_negative if expected_negative else 0
            total = total + positive_part + negative_part

        # Do the chi2 test now
        # TODO: Verify that these numbers actually work
        chi_val = chi2.isf(1 - confidence, num_attrs - 1)

        # Total > chi_val means keep splitting
        return total < chi_val
コード例 #36
0
def JCBB(z, zbar, S, alpha1, alpha2):
    assert len(z.shape) == 1, "z must be in one row in JCBB"
    assert z.shape[0] % 2 == 0, "z must be equal in x and y"
    m = z.shape[0] // 2

    a = np.full(m, -1, dtype=int)
    abest = np.full(m, -1, dtype=int)

    # ic has measurements rowwise and predicted measurements columnwise
    ic = individualCompatibility(z, zbar, S)
    g2 = chi2.isf(alpha2, 2)
    order = np.argsort(np.amin(ic, axis=1))
    j = 0
    z_order = np.empty(2 * len(order), dtype=int)
    z_order[::2] = 2 * order
    z_order[1::2] = 2 * order + 1
    zo = z[z_order]

    ico = ic[order]

    abesto = JCBBrec(zo, zbar, S, alpha1, g2, j, a, ico, abest)

    abest[order] = abesto

    return abest
コード例 #37
0
 def z_score(self):
     '''
         Lazy compute unsigned z-score
     '''
     if self.z_scr is None:
         self.z_scr = math.sqrt(chi2.isf(self.pval, df=1))
     return self.z_scr
コード例 #38
0
ファイル: general_likefunc.py プロジェクト: bjfar/pysusy
 def CLs2chi2(CLs):
    """This function takes a CLs value and estimates the corresponding
    Delta(Chi^2) value. The justification for this is slightly complex
    (see mathematica notebook CLsVSchi2.nb). We essentially assume the
    CLs values are 1-CL where CL values are the confidence limits
    obtained by the maximum likelihood ratio method, where the test
    statistic is assumed to follow a chi-squared (DOF=1) distribution.
    """
    return chi2.isf(CLs,1) #inverse survival function of the chi^2(DOF=1) distribution.
コード例 #39
0
 def solve(self):
     r = 3
     c = 2
     free_degree = (r - 1) * (c - 1)
     stat_value = 0
     chi2_value = chi2.isf(0.01, free_degree)
     for i in range(len(self.n_i)):
         for j in range(len(self.n_j)):
             stat_value += 1.0 * (self.n_i_j[i][j] - self.getTij(i, j)) ** 2 / self.getTij(i, j)
     return [round(free_degree, 2), round(stat_value, 2) + 0.01,not stat_value > chi2_value]
コード例 #40
0
def integral_calc_with_stopping(initial_guess,point_spread,phi_tilde):
    
    critical_value = chi2.isf(.05,len(initial_guess))
 
    z_dist, phi_r = integral_calc2(initial_guess,point_spread,phi_tilde)
    
    for i in xrange(100):
        
        
        
        z_dist, phi_r = integral_calc2(z_dist,point_spread,phi_tilde)
コード例 #41
0
ファイル: kw.py プロジェクト: FranciscoMeloJr/Python-Tests
def kruskal_willis(list_a, flag):
    
    vec = []
    list_sum = []
    df = len(list_a) - 1

        
    for each in list_a:
        if(flag):
            print each
        sum_ = 0
        i = 0
        for each_number in each:
            vec.append(each_number)
            sum_ += each_number

        
    #create rank:
    rank = sorted(vec)
    
    #new list with ranks:
    temp_total =[]
    for each in list_a:
        temp = []

        for each_item in each:
            i = 0
            temp.append(find_number(each_item, rank, flag))
        temp_total.append(temp)

    for each in temp_total:
        list_sum.append(calculate_total(each, flag))
        
    print "total:"
    print temp_total
    print "sum:"
    print list_sum
    
    if(flag):
        print rank
        print list_sum
        print df
        
    h = calculate_h(12, 18, list_sum, flag)
    #compare with qui square:
    p = 0.05
    df = len(list_a)-1
    ch_value =  chi2.isf(p, df)

    return evaluate_chi(ch_value, h)
コード例 #42
0
ファイル: MIT.py プロジェクト: regulomics/bnfinder
    def graph_score(self,number_of_potential_parents,gene_vertex,weights_of_parents,number_of_data_points): # g(Pa)
        
        # chisquare
        #print list_of_parents
            
#        sorted_parents = sorted(list_of_parents, key=lambda gene: gene.n_disc,reverse=True)
        #print "parent.size: ", len(list_of_parents)
        sum_of_l_values = 0.0
        
        for i,wi in enumerate(weights_of_parents):            
            l_i_sigma = (gene_vertex.base_weight() - 1)*(wi - 1)
            for w in weights_of_parents[:i]:
                l_i_sigma *= w
            chisquare_value = chi2.isf(1-self.alpha, l_i_sigma)
            #print "chisquare:", chisquare_value
            sum_of_l_values += chisquare_value

        #print [x.name for x in list_of_parents],gene_vertex.name
        #print "g_score: ", sum_of_l_values
        return sum_of_l_values    
コード例 #43
0
ファイル: biweight.py プロジェクト: EiffL/python_lib
def bwt_ave(x):

    x_median = np.median(x)
    x_mad = np.median(np.abs(x - np.median(x)))

    bwt_ave = 0.0
    while np.around(np.abs(bwt_ave - x_median), 8) > 0:
        bwt_ave, bwt_std = iter_bwt(x, x_median, x_mad)
        x_median = bwt_ave

    chi2_68_left = chi2.ppf(0.32 / 2.0, len(x) - 1)
    chi2_68_right = chi2.isf(0.32 / 2.0, len(x) - 1)
    t_68 = t.isf(0.32 / 2.0, long(0.7 * (len(x) - 1)))

    bwt_ave_low = bwt_ave + t_68 * bwt_std / np.sqrt(len(x))
    bwt_ave_up = bwt_ave - t_68 * bwt_std / np.sqrt(len(x))
    bwt_std_low = (np.sqrt((len(x) - 1) / chi2_68_left) - 1.0) * bwt_std
    bwt_std_up = (np.sqrt((len(x) - 1) / chi2_68_right) - 1.0) * bwt_std

    return (bwt_ave, bwt_ave_low, bwt_ave_up), (bwt_std, bwt_std_low, bwt_std_up)
コード例 #44
0
ファイル: generator.py プロジェクト: antiface/flying-gaussian
def draw_cov_ellipse(centroid, cov_matrix, ax, 
                     perc=0.95, color='b'):
    """Draw the ellipse associated with the multivariate normal distribution
    defined by *centroid* and *cov_matrix*. The *perc* argument specified 
    the percentage of the distribution mass that will be drawn.

    This function is based on the example posted on Matplotlib mailing-list:
    http://www.mail-archive.com/[email protected]/msg14153.html
    """
    U, s, _ = linalg.svd(cov_matrix)
    orient = atan2(U[1, 0], U[0, 0]) * 180.0/pi

    c = chi2.isf(1 - perc, len(centroid))
    width = 2.0 * sqrt(s[0] * c)
    height = 2.0 * sqrt(s[1] * c)

    ellipse = Ellipse(xy=centroid, width=width, height=height, 
                      angle=orient, fc=color, alpha=0.1)
    
    return ax.add_patch(ellipse)
コード例 #45
0
def _confmean(alpha, nanrobust, axis=None, ci=0.95, w=None, d=0):
    if w is None:
        w = np.ones(alpha.shape)

    # compute ingredients for conf. lim.
    if nanrobust:
        r = nanresvec(alpha, axis=axis, w=w, d=d)
    else:
        r = resvec(alpha, axis=axis, w=w, d=d)
    n = np.sum(w, axis=axis)
    R = n * r
    c2 = chi2.isf(1-ci, 1)

    # check for resultant vector length and select appropriate formula
    t = np.zeros(r.shape)

    tscalar = np.isscalar(r)
    if tscalar:
        r = np.array([r])
        n = np.array([n])
        R = np.array([R])
        t = np.array([t])

    # fill in values
    i = (r < 0.9) & (r > np.sqrt(c2 / 2. / n))
    t[i] = np.sqrt((2 * n[i] * (2 * R[i]**2 - n[i] * c2)) / (4 * n[i] - c2))

    j = r >= 0.9
    t[j] = np.sqrt(n[j] ** 2 - (n[j] ** 2 - R[j] ** 2) * np.exp(c2 / n[j]))

    t[~(i | j)] = np.nan

    # apply final transform
    t = np.arccos(t / R)
    if tscalar:
        t = t[0]

    return t
コード例 #46
0
ファイル: trout.py プロジェクト: tiagoantao/AgeStructureNe
def patch_ci(nindivs, r2, sr2, j, cut=0.025):
    b = chi2.isf(cut, j)
    t = chi2.isf(1 - cut, j)
    r2b, r2t = j * r2 / b, j * r2 / t
    return get_ldne(nindivs, sr2, r2b), get_ldne(nindivs, sr2, r2t)
コード例 #47
0
md2s_sorted = sorted( map( lambda a: a * a, mds ) );

mds_sorted = sorted( mds );

chi2_quantiles = [ ];

for i in range( len( mds_sorted ) ):
	
	j = i + 1.0
	
	n = len( mds_sorted );
	
	q_j = ( j - 0.5 ) / n;
	
	x2 = chi2.isf( 1.0 - q_j, 3 );
	
	x2_sr = math.sqrt( x2 );
	
	chi2_quantiles.append( x2_sr );
	
# Plot 1.

plt.figure( 1 );
plt.scatter( chi2_quantiles, mds_sorted, color = "green", alpha = 0.5 );
plt.title( "Real Robot Forward Motion Q-Q Plot" );
plt.xlabel( "Square Root of Chi-square Probability Quantile" );
plt.ylabel( "Ordered Mahalanobis Distance Quantile" );
plt.plot( [ min( chi2_quantiles ), max( chi2_quantiles ) ], [ min( chi2_quantiles ), max( chi2_quantiles ) ], color = "red", alpha = 0.5 );
plt.plot( 
コード例 #48
0
# -*- coding: utf-8 -*-
#

from scipy.stats import chi2, t, f
import numpy as np

# Q1
q1_1 = chi2.isf(q=0.95, df=4)
assert np.allclose(q1_1, 0.710723)
q1_2 = chi2.isf(q=0.05, df=4)
assert np.allclose(q1_2, 9.48773)
q1_3 = chi2.isf(q=0.95, df=9)
assert np.allclose(q1_3, 3.32511)
q1_4 = chi2.isf(q=0.05, df=9)
assert np.allclose(q1_4, 16.9190)

# Q2
q2_1 = t.isf(q=0.05, df=7)
assert np.allclose(q2_1, 1.895, rtol=1.e-3)
q2_2 = t.isf(q=0.025, df=7)
assert np.allclose(q2_2, 2.365, rtol=1.e-3)
q2_3 = t.isf(q=0.05, df=12)
assert np.allclose(q2_3, 1.782, rtol=1.e-3)
q2_4 = t.isf(q=0.025, df=12)
assert np.allclose(q2_4, 2.179, rtol=1.e-3)

# Q3
q3_1 = f.isf(q=0.05, dfn=5, dfd=7)
assert np.allclose(q3_1, 3.9715)
q3_2 = f.isf(q=0.95, dfn=5, dfd=7)
assert np.allclose(q3_2, 0.2050903422957813)  # inverse of F(7,5; 0.05)
コード例 #49
0
def estimate_confidence_intervals(label_model,significance=0.95,perturbation=0.1,min_absolute_perturbation=0.1,max_absolute_perturbation=25,parameter_precision=None,best_parameter_dict=None,evaluate_turnovers=False,parameter_list=None,fraction_of_optimum=0.9,force_flux_value_bounds=False,relative_max_random_sample=0.5, relative_min_random_sample= 0.25,annealing_n=50,annealing_m=100,annealing_p0=0.4,annealing_pf=0.001,annealing_n_processes=1,annealing_cycle_time_limit=1800, annealing_cycle_max_attempts=5,annealing_iterations=2,annealing_restore_parameters=True,fname="confidence.json",sbml_name=None,output=True):
   """
   Computes the confidence intervals for fluxes 
   
   label_model: label_model object
   signficance: float
        Signficance level for the confidence intervals. Default is 0.95 (95%)
   perturbation: float
        Relative perturbation for each parameter at each step. Default is 0.1 (10%). Regardless of this value the absolute perturbation will will never be lower than the value defined by the min_absolute_perturbation and will never be larger than the maximum value defined by the max_absolute_perturbation
   min_absolute_perturbation: float
	See above
   max_absolute_perturbation: float
	See above
   parameter_precision: float,
        Defines the precision of the flux value parameters. If none is defined the precision defined in the label_model object will be used
   best_parameter_dict: dict
	Dict with the best parameters that have been obtained after fitting the parameters to experimental data
   evaluate_turnovers: bool,
        If set to False (default) it will not calculate the confidence intervals for turnovers. 
   parameter_list: list
	List of the parameters that should be evaluated. Unless all flux value parameters are selected, the confidence intervals for fluxes (other than those directly analyzed) won't be meaningful
   fraction_of_optimum: float
        Fraction of the objective flux that should be mantained. If the parameters have been added automatically this will have no effect as the objective is alsways added as parameters 
   force_flux_value_bounds: bool,
   	If False it will ignore the bounds defined in the parameter dict and use the FVA limits for flux value parameters. If set to True it migh in some instances result on  unfeasible solutions
   relative_max_random_sample: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters
   relative_min_random_sample: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters 
   annealing_n: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters   
   annealing_m: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters
   annealing_p0: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters
   annealing_pf: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters
   annealing_n_processes: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters
   annealing_cycle_time_limit: float
   	Defines the parameter of the same name in the annealing function used to reoptimize the parameters
   annealing_cycle_max_attempts
   annealing_iterations: int
	Number of times annealing should be run once the signficance threeshold has been surpassed by a parameter to ensure this values is the real upper/lower limit for the paramater
   annealing_restore_parameters: bool:
	If True after each annealing iterations it will return the paramaters to each original value to reduce the risk of being trapper into local minimums
   fname: string:
        Name of the file where the results will be saved. It must have ither a xlsx, CSV or json extension.  
   sbml_name;: string
        Name of of the SBML that will generated containng the constrained_model restricted by the confidence interval results. If none (default) no SBML will be generated.
   output: bool
        If True it will indicate progress of the analysis on a text file named estimate estimate_confidence_interval_output.txt	
   """
   if parameter_precision==None:
      parameter_precision=label_model.parameter_precision
   if best_parameter_dict==None:
      best_parameter_dict=label_model.parameter_dict
   print parameter_list
   if parameter_list==None or parameter_list==[]:
      full_mode=True
      if evaluate_turnovers:
         parameter_list=best_parameter_dict.keys()  
      else:
         parameter_list=[] 
         for x in best_parameter_dict:
             if best_parameter_dict[x]["type"] != "turnover":
                parameter_list.append(x) 
   else:
      full_mode=False
   precision=int(-1*(math.log10(parameter_precision)))
   max_random_sample=int(relative_max_random_sample*len(best_parameter_dict))
   min_random_sample=int(relative_min_random_sample*len(best_parameter_dict))
   #chi_parameters_sets_dict={}
   parameter_confidence_interval_dict={}
   flux_confidence_interval_dict={}
   parameter_value_parameters_sets_dict={}
   build_flux_confidence_interval_dict(label_model,flux_confidence_interval_dict,parameter_list)
   build_confidence_dicts(parameter_confidence_interval_dict,parameter_value_parameters_sets_dict,best_parameter_dict)
   """for flux in label_model.flux_dict:
       flux_confidence_interval_dict[flux]={"lb":label_model.flux_dict[flux],"ub":label_model.flux_dict[flux]}
       if (flux+"_reverse") in label_model.flux_dict:
          net_flux=label_model.flux_dict[flux]-label_model.flux_dict[flux+"_reverse"]
          flux_confidence_interval_dict["net_"+flux]={"lb":net_flux,"ub":net_flux}"""
   print flux_confidence_interval_dict
   apply_parameters(label_model,best_parameter_dict,parameter_precision=parameter_precision)
   a,b=solver(label_model)
   best_objective,b,c=get_objective_function(label_model,output=False)
   delta_chi = chi2.isf(q=1-significance, df=1)
   signficance_threshold=delta_chi+best_objective
   print signficance_threshold
   if output:
      with open("estimate_confidence_interval_output.txt", "a") as myfile:
           myfile.write("signficance_threshold "+str(signficance_threshold)+"\n")
   original_objectives_bounds={}
   for reaction in label_model.constrained_model.objective: 
              original_objectives_bounds[reaction.id]={}
              original_objectives_bounds[reaction.id]["lb"]=reaction.lower_bound
              original_objectives_bounds[reaction.id]["ub"]=reaction.upper_bound
              original_objectives_bounds[reaction.id]["obj_coef"]=reaction.objective_coefficient
              fva=flux_variability_analysis(label_model.constrained_model,reaction_list=[reaction], fraction_of_optimum=fraction_of_optimum,tolerance_feasibility=label_model.lp_tolerance_feasibility)
              reaction.lower_bound=max(round_down(fva[reaction.id]["minimum"],precision),reaction.lower_bound)
              reaction.upper_bound=min(round_up(fva[reaction.id]["maximum"],precision),reaction.upper_bound)
              reaction.objective_coefficient=0
   
   flux_parameter_list=[]
   
   for parameter in best_parameter_dict:
       if best_parameter_dict[parameter]["type"]=="flux value":
          flux_parameter_list.append(parameter)
   feasability_process = Pool(processes=1)
   for parameter in parameter_list:
       apply_parameters(label_model,best_parameter_dict,parameter_precision=parameter_precision)
       a,b=solver(label_model)
       #chi_parameters_sets_dict[parameter]={}
       #variation_range= best_parameter_dict[parameter]["ub"]-best_parameter_dict[parameter]["lb"] 
       #Find the highest/lowest value found on previous simulations
       n=1
       sign=1
       #is_flux_value=parameter in flux_parameter_list
       if parameter not in best_parameter_dict:
          additional_parameter=True 
          parameter_dict=max_parameter_dict=min_parameter_dict=copy.deepcopy(best_parameter_dict)
          if parameter in label_model.constrained_model.reactions:
             print "is not ratio"
             value=label_model.constrained_model.solution.x_dict[parameter]
             reaction=label_model.constrained_model.reactions.get_by_id(parameter)
             lb=reaction.lower_bound
             ub=reaction.upper_bound
             parameter_dict[parameter]={"v":value,"lb":lb,"ub":ub ,"type":"flux value","reactions":[parameter],"max_d":0.1,"original_lb":lb,"original_ub":ub,"original_objective_coefficient":0.0}
          elif "/" in parameter:
             print "is ratio"
             reaction1=parameter.split("/")[0]
             reaction2=parameter.split("/")[1]  
             value,lb,ub=get_ratios_bounds(label_model,parameter,0.1,lp_tolerance_feasibility=label_model.lp_tolerance_feasibility,parameter_dict=parameter_dict)
             parameter_dict[parameter]={"v":value,"lb":lb,"ub":ub ,"type":"ratio","ratio":{reaction1:"v",reaction2:1},"max_d":0.1}
             print parameter      
       else:
          additional_parameter=False 
          #TODO Make it so it can start from the highuest and lowest value of the parameter found in previous simulations   
          min_parameter_dict=parameter_value_parameters_sets_dict[parameter]["lb_parameter_dict"]
          max_parameter_dict=parameter_value_parameters_sets_dict[parameter]["ub_parameter_dict"]
       parameter_lb,parameter_ub=get_bounds(label_model,min_parameter_dict,parameter,force_flux_value_bounds,flux_parameter_list)
       """lb_list=[]#[best_parameter_dict[parameter]["lb"]]
       ub_list=[]#[best_parameter_dict[parameter]["ub"]]
       if is_flux_value==True:
          clear_parameters(label_model,parameter_dict=best_parameter_dict,parameter_list=flux_parameter_list, clear_ratios=False,clear_turnover=False,clear_fluxes=True,restore_objectives=False) #Clear all parameters
          #Get real upper and lower bound for the parameters
          for reaction_id in best_parameter_dict[parameter]["reactions"]: 
                fva=flux_variability_analysis(label_model.constrained_model,fraction_of_optimum=0,reaction_list=[reaction_id],tolerance_feasibility=label_model.lp_tolerance_feasibility)
                lb_list.append(fva[reaction_id]["minimum"])
                ub_list.append(fva[reaction_id]["maximum"])
       if is_flux_value==False or force_flux_value_bounds:
          lb_list.append(best_parameter_dict[parameter]["lb"])
          ub_list.append(best_parameter_dict[parameter]["ub"])
       parameter_lb=max(lb_list)
       parameter_ub=min(ub_list)"""
       if output:
          with open("estimate_confidence_interval_output.txt", "a") as myfile:
               myfile.write("///////"+parameter+"(lb="+str(parameter_lb)+" ub="+str(parameter_ub)+ ")\n")
       while(n<=100000):
          stop_flag=False
          if n==1:
             parameter_dict=copy.deepcopy(max_parameter_dict)
             #Run a quick evaluation of the upper bound to see if it is not necessary to "walk there" 
             parameter_dict,f_best=evaluate_parameter(label_model,parameter_dict,flux_parameter_list,parameter,parameter_lb,parameter_ub,parameter_ub,signficance_threshold,feasability_process,parameter_precision,max_absolute_perturbation/10.0,force_flux_value_bounds,max(int(annealing_n*0.5),2),annealing_m,annealing_p0,annealing_pf,max_random_sample,min_random_sample,annealing_n_processes,annealing_cycle_time_limit, annealing_cycle_max_attempts,annealing_iterations=1,annealing_restore_parameters=annealing_restore_parameters)
             if f_best<=signficance_threshold:
                build_flux_confidence_interval_dict(label_model,flux_confidence_interval_dict,parameter_list)
                parameter_dict_to_store=copy.deepcopy(parameter_dict)
                if additional_parameter:
                   del parameter_dict_to_store[parameter]
                build_confidence_dicts(parameter_confidence_interval_dict,parameter_value_parameters_sets_dict,parameter_dict_to_store)
                
             else:
                parameter_dict=copy.deepcopy(max_parameter_dict)
             if output:
                with open("estimate_confidence_interval_output.txt", "a") as myfile:
                     myfile.write(parameter+" "+"v="+str(parameter_ub)+" chi="+str(f_best)+"\n")
          delta_parameter=min(max(perturbation*abs(parameter_dict[parameter]["v"]),min_absolute_perturbation),max_absolute_perturbation)
          print delta_parameter        
          parameter_new_value=max(min(parameter_dict[parameter]["v"]+delta_parameter*sign,parameter_ub),parameter_lb)
          parameter_dict,f_best=evaluate_parameter(label_model,parameter_dict,flux_parameter_list,parameter,parameter_lb,parameter_ub,parameter_new_value,signficance_threshold,feasability_process,parameter_precision,max_absolute_perturbation/10.0,force_flux_value_bounds,annealing_n,annealing_m,annealing_p0,annealing_pf,max_random_sample,min_random_sample,annealing_n_processes,annealing_cycle_time_limit, annealing_cycle_max_attempts,annealing_iterations,annealing_restore_parameters=annealing_restore_parameters) 
          if output:
             with open("estimate_confidence_interval_output.txt", "a") as myfile:
               myfile.write(parameter+" "+"v="+str(parameter_new_value)+" chi="+str(f_best)+"\n")
          
          if f_best>signficance_threshold:
             stop_flag=True
          else: 
             if f_best<best_objective: #If a solution is found that is better than the optimal solution restart the confidence interval simulation with the new parameter set
                parameter_dict_to_store=copy.deepcopy(parameter_dict)
                if additional_parameter:
                   clear_parameters(label_model,parameter_dict=parameter_dict,parameter_list=[parameter], clear_ratios=True,clear_turnover=False,clear_fluxes=True,restore_objectives=False) #
                   del parameter_dict_to_store[parameter]
                parameter_confidence_interval_dict={}
                flux_confidence_interval_dict={}
                parameter_value_parameters_sets_dict={}
                if output:
                   with open("estimate_confidence_interval_output.txt", "a") as myfile:
                        myfile.write("Restarting analysis with new bestfit\n")
                best_parameter_dict,best_flux_dict,f_best=annealing(label_model,n=annealing_n,m=annealing_m,p0=annealing_p0,pf=annealing_pf,max_random_sample=max_random_sample,min_random_sample=min_random_sample,mode="fsolve",fraction_of_optimum=0,parameter_precision=parameter_precision,parameter_to_be_fitted=[],max_perturbation=max_absolute_perturbation,gui=None,fba_mode="fba", break_threshold=signficance_threshold,parameter_dict=parameter_dict_to_store,n_processes=annealing_n_processes,cycle_time_limit=annealing_cycle_time_limit, cycle_max_attempts=annealing_cycle_max_attempts,output=False,force_flux_value_bounds=force_flux_value_bounds)
                if full_mode:
                   parameter_list=None
                parameter_confidence_interval_dict,flux_confidence_interval_dict,parameter_value_parameters_sets_dict,constrained_model=estimate_confidence_intervals(label_model,significance=significance,perturbation=perturbation,min_absolute_perturbation=min_absolute_perturbation, max_absolute_perturbation=max_absolute_perturbation ,parameter_precision=parameter_precision, best_parameter_dict=best_parameter_dict ,parameter_list=parameter_list ,fraction_of_optimum=fraction_of_optimum ,force_flux_value_bounds=force_flux_value_bounds ,relative_max_random_sample=relative_max_random_sample, relative_min_random_sample= relative_min_random_sample,annealing_n=annealing_n,annealing_m=annealing_m,annealing_p0=annealing_p0,annealing_pf=annealing_pf,annealing_n_processes=annealing_n_processes,annealing_cycle_time_limit=annealing_cycle_time_limit, annealing_cycle_max_attempts= annealing_cycle_max_attempts, annealing_iterations=annealing_iterations ,annealing_restore_parameters=annealing_restore_parameters ,fname=fname,output=output,sbml_name=sbml_name,evaluate_turnovers=evaluate_turnovers)
                #parameter_confidence_interval_dict,flux_confidence_interval_dict,parameter_value_parameters_sets_dict =estimate_confidence_intervals(label_model,significance=significance,perturbation=perturbation,min_absolute_perturbation=min_absolute_perturbation,max_absolute_perturbation=max_absolute_perturbation,parameter_precision=parameter_precision,best_parameter_dict=parameter_dict,parameter_list=parameter_list,fraction_of_optimum=fraction_of_optimum,force_flux_value_bounds=force_flux_value_bounds,relative_max_random_sample=relative_max_random_sample, relative_min_random_sample= relative_min_random_sample,annealing_n=annealing_n,annealing_m=annealing_m,annealing_p0=annealing_p0,annealing_pf=annealing_pf,output=output,annealing_n_processes=annealing_n_processes,annealing_cycle_time_limit=annealing_cycle_time_limit, annealing_cycle_max_attempts=annealing_cycle_max_attempts,annealing_iterations=annealing_iterations,annealing_restore_parameters=annealing_restore_parameters,fname=fname)
                return parameter_confidence_interval_dict,flux_confidence_interval_dict,parameter_value_parameters_sets_dict,constrained_model
             if parameter_dict[parameter]["v"]<=parameter_lb or parameter_dict[parameter]["v"]>=parameter_ub:
                stop_flag=True
             """if sign==1:
                parameter_confidence_interval_dict[parameter]["ub"]=new_value
             else:
                parameter_confidence_interval_dict[parameter]["lb"]=new_value"""
             build_flux_confidence_interval_dict(label_model,flux_confidence_interval_dict,parameter_list)
             parameter_dict_to_store=copy.deepcopy(parameter_dict)
             if additional_parameter:
                del parameter_dict_to_store[parameter]
             build_confidence_dicts(parameter_confidence_interval_dict,parameter_value_parameters_sets_dict,parameter_dict_to_store)
          if stop_flag==True:
            print "stop"
            if sign==1:
                sign=-1
                parameter_dict=copy.deepcopy(min_parameter_dict)
                parameter_dict,f_best=evaluate_parameter(label_model,parameter_dict,flux_parameter_list,parameter,parameter_lb,parameter_ub,parameter_lb,signficance_threshold,feasability_process,parameter_precision,max_absolute_perturbation/10.0,force_flux_value_bounds,annealing_n,annealing_m,annealing_p0,annealing_pf,max_random_sample,min_random_sample,annealing_n_processes,annealing_cycle_time_limit, annealing_cycle_max_attempts,annealing_iterations=1,annealing_restore_parameters=annealing_restore_parameters)
                if f_best<=signficance_threshold:
                    build_flux_confidence_interval_dict(label_model,flux_confidence_interval_dict,parameter_list)
                    parameter_dict_to_store=copy.deepcopy(parameter_dict)
                    if additional_parameter:
                       del parameter_dict_to_store[parameter]
                    build_confidence_dicts(parameter_confidence_interval_dict,parameter_value_parameters_sets_dict,parameter_dict_to_store)
                else:
                    parameter_dict=copy.deepcopy(min_parameter_dict)
                if output:
                   with open("estimate_confidence_interval_output.txt", "a") as myfile:
                        myfile.write(parameter+" "+"v="+str(parameter_lb)+" chi="+str(f_best)+"\n")
                
            else:
                clear_parameters(label_model,parameter_dict=parameter_dict,parameter_list=[parameter], clear_ratios=True,clear_turnover=False,clear_fluxes=True,restore_objectives=False) #Clear all parameters
                break
          n+=1
          print ["n",n]
          
         
   for reaction_id in original_objectives_bounds:
            reaction=label_model.constrained_model.reactions.get_by_id(reaction_id)
            reaction.lower_bound=original_objectives_bounds[reaction_id]["lb"]
            reaction.upper_bound=original_objectives_bounds[reaction_id]["ub"]
            reaction.objective_coefficient=original_objectives_bounds[reaction_id]["obj_coef"]
   #apply_parameters(label_model,best_parameter_dict,parameter_precision=parameter_precision)
   feasability_process.close()
   if "xlsx" in fname or "csv" in fname: 
      print [full_mode]
      if not full_mode:
         save_flux_confidence_interval(label_model,flux_confidence_interval_dict,significance=significance,fn=fname,omit_turnovers=not evaluate_turnovers,parameter_list=parameter_list)
      else:
        save_flux_confidence_interval(label_model,flux_confidence_interval_dict,significance=significance,fn=fname,omit_turnovers=not evaluate_turnovers,parameter_list=None)   
   elif "json" in  fname:
      save_confidence_interval_json(flux_confidence_interval_dict,parameter_confidence_interval_dict,fn=fname)
   constrained_model=save_sbml_with_confidence_results(label_model,flux_confidence_interval_dict,fname=sbml_name,parameter_dict=best_parameter_dict,full_mode=full_mode,parameter_list=parameter_list,precision=precision)     
   apply_parameters(label_model,best_parameter_dict,parameter_precision=parameter_precision)
   return parameter_confidence_interval_dict,flux_confidence_interval_dict,parameter_value_parameters_sets_dict,constrained_model
コード例 #50
0
ファイル: significance.py プロジェクト: cdeil/gammapy
def convert_likelihood(to, probability=None, significance=None,
                       ts=None, chi2=None, df=None):
    """Convert between various equivalent likelihood measures.

    TODO: don't use ``chi2`` with this function at the moment ...
    I forgot that one also needs the number of data points to
    compute ``ts``:
    http://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test#Calculating_the_test-statistic
    Probably it's best to split this out into a separate function
    or just document how users should compute ``ts`` before calling this
    function if they have ``chi2``.


    This function uses the ``sf`` and ``isf`` methods of the
    `~scipy.stats.norm` and `~scipy.stats.chi2` distributions
    to convert between various equivalent ways to quote a likelihood.

    - ``sf`` means "survival function", which is the "tail probability"
      of the distribution and is defined as ``1 - cdf``, where ``cdf``
      is the "cumulative distribution function".
    - ``isf`` is the inverse survival function.

    The relation between the quantities can be summarised as:

    - significance <-- normal distribution ---> probability
    - probability <--- chi2 distribution with df ---> ts
    - ts = chi2 / df

    So supporting both ``ts`` and ``chi2`` in this function is redundant,
    it's kept as a convenience for users that have a ``ts`` value from
    a Poisson likelihood fit and users that have a ``chi2`` value from
    a chi-square fit.

    Parameters
    ----------
    to : {'probability', 'ts', 'significance', 'chi2'}
        Which quantity you want to compute.
    probability, significance, ts, chi2 : array_like
        Input quantity value ... mutually exclusive, pass exactly one!
    df : array_like
        Difference in number of degrees of freedom between
        the alternative and the null hypothesis model.

    Returns
    -------
    value : `numpy.ndarray`
        Output value as requested by the input ``to`` parameter.

    Notes
    -----

    **TS computation**

    Under certain assumptions Wilk's theorem say that the likelihood ratio
    ``TS = 2 (L_alt - L_null)`` has a chi-square distribution with ``ndf``
    degrees of freedom in the null hypothesis case, where
    ``L_alt`` and ``L_null`` are the log-likelihoods in the null and alternative
    hypothesis and ``ndf`` is the difference in the number of freedom in those models.

    Note that the `~gammapy.stats.cash` statistic already contains the factor 2,
    i.e. you should compute ``TS`` as ``TS = cash_alt - cash_null``.

    - https://en.wikipedia.org/wiki/Chi-squared_distribution
    - http://docs.scipy.org/doc/scipy-dev/reference/generated/scipy.stats.chi2.html
    - https://en.wikipedia.org/wiki/Likelihood-ratio_test
    - https://adsabs.harvard.edu/abs/1979ApJ...228..939C
    - https://adsabs.harvard.edu/abs/2009A%26A...495..989S

    **Physical limits**

    ``probability`` is the one-sided `p-value`, e.g. `significance=3`
    corresponds to `probability=0.00135`.

    TODO: check if this gives correct coverage for cases with hard physical limits,
    e.g. when fitting TS of extended sources vs. point source and in half of the
    cases ``TS=0`` ... I suspect coverage might not be OK and we need to add an
    option to this function to handle those cases!

    Examples
    --------
    Here's some examples how to compute the ``probability`` or ``significance``
    for a given observed ``ts`` or ``chi2``:

    >>> from gammapy.stats import convert_likelihood
    >>> convert_likelihood(to='probability', ts=10, df=2)
    0.0067379469990854679
    >>> convert_likelihood(to='significance', chi2=19, df=7)
    2.4004554920435521

    Here's how to do the reverse, compute the ``ts`` or ``chi2`` that would
    result in a given ``probability`` or ``significance``.

    >>> convert_likelihood(to='ts', probability=0.01, df=1)
    6.6348966010212171
    >>> convert_likelihood(to='chi2', significance=3, df=10)
    28.78498865156606
    """
    from scipy.stats import norm as norm_distribution
    from scipy.stats import chi2 as chi2_distribution

    # ---> Check inputs are OK!
    # ---> This is a function that will be used interactively by end-users,
    # ---> so we want good error messages if they use it correctly.

    # Check that the output `to` parameter is valid
    valid_quantities = ['probability', 'ts', 'significance', 'chi2']
    if to not in valid_quantities:
        msg = 'Invalid parameter `to`: {}\n'.format(to)
        msg += 'Valid options are: {}'.format(valid_quantities)
        raise ValueError(msg)

    # Check that the input is valid
    _locals = locals().copy()
    input_values = [_ for _ in valid_quantities
                    if _locals[_] is not None]
    if len(input_values) != 1:
        msg = 'You have to pass exactly one of the valid input quantities: '
        msg += ', '.join(valid_quantities)
        msg += '\nYou passed: '
        if len(input_values) == 0:
            msg += 'none'
        else:
            msg += ', '.join(input_values)
        raise ValueError(msg)

    input_type = input_values[0]
    input_value = locals()[input_type]

    # Check that `df` is given if it's required for the computation
    if any(_ in ['ts', 'chi2'] for _ in [input_type, to]) and df is None:
        msg = 'You have to specify the number of degrees of freedom '
        msg += 'via the `df` parameter.'
        raise ValueError(msg)


    # ---> Compute the requested quantity
    # ---> By now we know the inputs are OK.

    # Compute equivalent `ts` for `chi2` ... after this
    # the code will only handle the `ts` input case,
    # i.e. conversions: significance <-> probability <-> ts
    if chi2 is not None:
        ts = chi2 / df

    # A note that might help you understand the nested if-else-statement:
    # The quantities `probability`, `significance`, `ts` and `chi2`
    # form a graph with `probability` at the center.
    # There might be functions directly relating the other quantities
    # in general or in certain limits, but the computation here
    # always proceeds via `probability` as a one- or two-step process.

    if to == 'significance':
        if ts is not None:
            probability = chi2_distribution.sf(ts, df)
        return norm_distribution.isf(probability)

    elif to == 'probability':
        if significance is not None:
            return norm_distribution.sf(significance)
        else:
            return chi2_distribution.sf(ts, df)

    elif to == 'ts':
        # Compute a probability if needed
        if significance is not None:
            probability = norm_distribution.sf(significance)

        return chi2_distribution.isf(probability, df)

    elif to == 'chi2':
        if ts is not None:
            return df * ts
        # Compute a probability if needed
        if significance is not None:
            probability = norm_distribution.sf(significance)

        return chi2_distribution.isf(probability, df)
コード例 #51
0
ファイル: munge_sumstats.py プロジェクト: Genomicsplc/ldsc
def p_to_z(P, N):
    '''Convert P-value and N to standardized beta.'''
    return np.sqrt(chi2.isf(P, 1))
コード例 #52
0
ファイル: images.py プロジェクト: dickwillingale/qsoft
def sqbeam(arr,hbeam,blev,bvar):
    """Analysis of source above background within a square beam

    Args:
        arr:     image array
        hbeam:   half width of square beam in pixels
        blev:    average background level per pixel (to be subtracted)
        bvar:    variance on blev (-ve for counting statistics)

    Returns:
        | list with the following
        | **nx,ny**:    dimension of beam pixels (truncated if falls off edge)
        | **xpi,xpr**:  x output arrays, pixel position and flux
        | **ypi,ypr**:  y output arrays, pixel position and flux
        | **bflux**:    background in beam (e.g. counts)
        | **bsigma**:   standard deviation of background
        | **flux**:     source flux above background in beam (e.g. counts)
        | **fsigma**:   standard deviation of source flux
        | **peak**:     source x,y peak position
        | **cen**:      source x,y centroid position
        | **rmsx**:     rms width in x (pixels) about centroid
        | **rmsy**:     rms width in y (pixels) about centroid
        | **pi5**:      5% x,y position
        | **pi25**:     25% x,y position
        | **med**:      median (50%) x,y position
        | **pi75**:     75% x,y position
        | **pi95**:     95% x,y position
        | **hewx**:     HEW (half energy width) x (pixels)
        | **hewy**:     HEW (half energy width) y (pixels)
        | **w90x**:     W90 (90% width) x (pixels)
        | **w90y**:     W90 (90% width) y (pixels)
        | **fitx**:     parameters from x profile fit using king_profile() 
        | **fity**:     parameters from y profile fit using king_profile() 
    |
    | Fits performed if bvar!=0.
    | Parameters are saved in the lists fitx and fity
    |   0: peak value (no error range calculated)
    |   1: peak X or Y pixel position (no error range calculated)
    |   2: Lorentzian width including 90% upper and lower bounds
    |   3: power index (1 for Lorentzian)
    | The position of the sqbeam is the current position within the image.
    | Use function setpos() to set the current position.
    """
    npp=1000
    a=imagesfor.qri_sqbeam(arr,hbeam,blev,bvar,npp)
    b=bdata()
    b.xpi=a[0]
    b.xpr=a[1]
    b.ypi=a[2]
    b.ypr=a[3]
    b.buf=a[4]
    b.nx=a[5]
    b.ny=a[6]
    b.bflux=a[7]
    b.bsigma=a[8]
    b.flux=a[9]
    b.fsigma=a[10]
    b.peak=a[11]
    b.cen=a[12]
    b.rmsx=a[13]
    b.rmsy=a[14]
    b.pi5=a[15]
    b.pi25=a[16]
    b.med=a[17]
    b.pi75=a[18]
    b.pi95=a[19]
    b.hewx=a[20]
    b.hewy=a[21]
    b.w90x=a[22]
    b.w90y=a[23]
    b.xpi=b.xpi[0:b.nx]
    b.xpr=b.xpr[0:b.nx]
    b.ypi=b.ypi[0:b.ny]
    b.ypr=b.ypr[0:b.ny]
    if bvar!=0:
        delstat= chi2.isf(0.1,4)
        derr=np.array([False,False,False,False])
        def xchisq(fpars):
            #xm=lorentzian(b.xpi,fpars)
            #xm=gaussian(b.xpi,fpars)
            xm=king_profile(b.xpi,fpars)
            return np.sum(np.square(b.xpr-xm)/bvar/b.ny)
        pval=np.amax(b.xpr[0:b.nx])
        spars=np.array([pval,b.med[0],b.hewx/2.,1.0])
        lpars=np.array([pval/2,b.cen[0]-b.hewx/2,b.hewx/2.,0.3])
        upars=np.array([pval*2,b.cen[0]+b.hewx/2,b.hewx*2.,3.0])
        b.fitx=srchmin(spars,lpars,upars,xchisq,delstat,derr)
        def ychisq(fpars):
            #ym=lorentzian(b.ypi,fpars)
            #ym=gaussian(b.ypi,fpars)
            ym=king_profile(b.ypi,fpars)
            return np.sum(np.square(b.ypr-ym)/bvar/b.nx)
        pval=np.amax(b.ypr[0:b.ny])
        spars=np.array([pval,b.med[1],b.hewy/2.,1.0])
        lpars=np.array([pval/2,b.cen[1]-b.hewy/2,b.hewy/2.,0.3])
        upars=np.array([pval*2,b.cen[1]+b.hewy/2,b.hewy*2.,3.0])
        b.fity=srchmin(spars,lpars,upars,ychisq,delstat,derr)
    else:
        b.fitx=False
        b.fity=False
    return b
コード例 #53
0
ファイル: images.py プロジェクト: dickwillingale/qsoft
def beam(arr,rbeam,blev,bvar):
    """Analysis of source above background within a circular beam

    Args:
        arr:     image array
        rbeam:   radius of beam in pixels
        blev:    average background level per pixel (to be subtracted)
        bvar:    variance on blev (-ve for counting statistics)

    Returns:
        | list with the following
        | **nsam**:    number of pixels in beam
        | **bflux**:   background in beam (e.g. counts)
        | **bsigma**:  standard deviation of background
        | **flux**:    source flux above background in beam (e.g. counts)
        | **fsigma**:  standard deviation of source flux
        | **peak**:    source x,y peak position
        | **cen**:     source x,y centroid position
        | **tha**:     angle (degrees) of major axis wrt x (x to y +ve)
        | **rmsa**:    source max rms width (major axis) (pixels)
        | **rmsb**:    source min rms width (minor axis) (pixels)
        | **fwhm**:    full width half maximum (pixels) about beam centre
        | **hew**:     half energy width (pixels) about beam centre
        | **w90**:     W90 (90% width) (pixels) about beam centre
        | **fwhmp**:   full width half maximum (pixels) about peak
        | **hewp**:    half energy width (pixels) about peak
        | **w90p**:    W90 (90% width) (pixels) about peak
        | **fwhmc**:   full width half maximum (pixels) about centroid
        | **hewc**:    half energy width (pixels) about centroid
        | **w90c**:    W90 (90% width) (pixels) about centroid
        | **fit**:     parameters from peak fit using peakchisq() 
    |
    | Fit performed if bvar!=0.
    | Parameters are saved in the list fit (see function srchmin())
    |   0:      peak value (no error range calculated)
    |   1:      peak X pixel position with 90% error range
    |   2:      peak Y pixel position with 90% error range
    |   3:      Lorentzian width including 90% upper and lower bounds
    |
    | The position of the beam is the current position within the image.
    | Use function setpos() to set the current position.
    """
    a=imagesfor.qri_beam(arr,rbeam,blev,bvar)
    b=bdata()
    b.nsam=a[0]
    b.bflux=a[1]
    b.bsigma=a[2]
    b.flux=a[3]
    b.fsigma=a[4]
    b.peak=a[5]
    b.cen=a[6]
    b.tha=a[7]
    b.rmsa=a[8]
    b.rmsb=a[9]
    b.fwhm=a[10]
    b.hew=a[11]
    b.w90=a[12]
    b.fwhmp=a[13]
    b.hewp=a[14]
    b.w90p=a[15]
    b.fwhmc=a[16]
    b.hewc=a[17]
    b.w90c=a[18]
    if bvar!=0:
        delstat= chi2.isf(0.1,4)
        iix=int(np.rint(b.cen[0]))
        iiy=int(np.rint(b.cen[1]))
        pval=arr[iix,iiy]
        spars=np.array([pval,b.cen[0],b.cen[1],b.fwhmc/2.])
        lpars=np.array([pval/2,b.cen[0]-b.fwhmc/2,
        b.cen[1]-b.fwhmc/2,b.fwhmc/2./2])
        upars=np.array([pval*2,b.cen[0]+b.fwhmc/2,
        b.cen[1]+b.fwhmc/2,b.fwhmc/2.*2])
        derr=np.array([False,True,True,True])
        b.fit=srchmin(spars,lpars,upars,peakchisq,delstat,derr)
    else:
        b.fit=False
    return b
コード例 #54
0
ファイル: images.py プロジェクト: dickwillingale/qsoft
def lecbeam(arr,s,h,blev,bvar,nt):
    """Analysis of source above background in a lobster eye cross-beam
   
    Args:
        arr:     image array
        s:       size of cross-beam square area in pixels
        h:       height of cross-arm quadrant in pixels (=2d/L)
        blev:    average background level per pixel (to be subtracted)
        bvar:    variance on blev (-ve for counting statistics)
        nt:      dimension of output quadrant flux distribution in pixels
    
    Returns:
        list of the following
    | **qua**:     quadrant surface brightness distribution array nt by nt
    | **quan**:    quadrant  pixel occupancy array nt by nt
    | **nsam**:    number of pixels in beam
    | **bflux**:   background in beam (e.g. counts)
    | **bsigma**:  standard deviation of background
    | **flux**:    source flux above background in beam (e.g. counts)
    | **fsigma**:  standard deviation of source flux
    | **peak**:    source x,y peak position
    | **cen**:     source x,y centroid position
    | **hew**:     half energy width (pixels)
    | **w90**:     W90 (90% width) (pixels)
    | **ahe**:     half energy area (sq pixels)
    | **aw9**:     W90 (90% width) area (sq pixels)
    | **fpeak**:   flux in peak pixel
    | **fit**:     results from fitting the quadrant distribution
    | **norm**:    fitted peak value
    | **G**:       fitted peak value Lorentzian width G pixels
    | **eta**      fitted ratio of cross-arms to peak
    | **alpha**    fitted index (1 for Lorentzian)
    |
    | The fitted parameters can also be found in fit.x
    | x[0] normalisation
    | x[1] width G pixels
    | x[2] eta brighness of cross-arms wrt spot
    | x[3] King index alpha 
    |
    | The position of the beam is the current position within the image.
    | Use function setpos() to set the current position.
    """
    a=imagesfor.qri_lecbeam(arr,s,h,blev,bvar,nt)
    b=bdata()
    b.qua=a[0]
    b.quan=a[1]
    b.nsam=a[2]
    b.bflux=a[3]
    b.bsigma=a[4]
    b.flux=a[5]
    b.fsigma=a[6]
    b.peak=a[7]
    b.cen=a[8]
    b.hew=a[9]
    b.w90=a[10]
    b.ahew=a[11]
    b.aw90=a[12]
    b.fpeak=a[13]
    def quafun(x,y,p):
        """Lobster eye quadrant using King function (modified Lorentzian)

        Args:
            x:          array of x values
            y:          array of x values
            p:          array of fitting parameters

        | par 0: normalisation (value at peak)
        | par 1: width of King profile G (not quite FWHM)
        | par 2: eta strength of cross-arm wrt central spot
        | par 3: power index alpha (1 for Lorentzian)

        Returns:
            function evaluated at x,y
        """
        eg=p[2]*p[1]/h
        x1=1/np.power(1+np.square(x*2.0/p[1]),p[3])
        x2=eg*(1-np.square(x/h))
        y1=1/np.power(1+np.square(y*2.0/p[1]),p[3])
        y2=eg*(1-np.square(y/h))
        return (x1*y1+x1*y2+x2*y1+x2*y2)*p[0]/np.square(1+eg)
    zqua=b.qua
    zqua.shape=(nt,nt)
    nqua=b.quan
    nqua.shape=(nt,nt)
    def quastat(p):
        stat=0.0
        for i in range(nt):
            xqua=i+0.5
            for j in range(nt):
                if nqua[i,j]>0:
                    yqua=j+0.5
                    mm=quafun(xqua,yqua,p)
                    stat=stat+np.square(zqua[i,j]-mm)/mm
        return stat
    delstat= chi2.isf(0.1,2)
    derr=np.array([False,False,False,False])
    spars=np.array([zqua[0,0],b.hew,1.0,1.0])
    lpars=np.array([zqua[0,0]/10,b.hew/10,0.1,0.1])
    upars=np.array([zqua[0,0]*10,b.hew*10,10.0,5.0])
    b.fit=srchmin(spars,lpars,upars,quastat,delstat,derr)
    b.norm=b.fit.x[0]
    b.G=b.fit.x[1]
    b.eta=b.fit.x[2]
    b.alpha=b.fit.x[3]
    return b