Ejemplo n.º 1
0
def chi2_dir(cause, effect, unknown, n, p_cause, p_effect_given_cause):
    cnt = count(zip(effect, unknown))
    #print cnt
    chi_indep = chi2_contingency(cnt)[1]
    p_unknown_given_effect = [ float(cnt[0][1]) / sum(cnt[0]),
                               float(cnt[1][1]) / sum(cnt[1]) ]
    #print 'p(bact|cd)=%s' % p_unknown_given_effect
    exp=[[0,0],[0,0]]
    for c in range(2):
        for e in range(2):
            for u in range(2):
                exp[c][u] += (n * 
                              p_of_val(p_cause, c) *
                              p_of_val(p_effect_given_cause[c], e) *
                              p_of_val(p_unknown_given_effect[e], u))
    cnt = count(zip(cause, unknown))
    #print "obs=%s" % cnt
    #print 'cnt=%s' % cnt
    #print 'expected if cd->bact=%s' % exp
    chi_rev = chisquare(cnt, exp, axis=None, ddof=2)
    chi_fwd = chi2_contingency(cnt)
    #print 'expected if bact->cd=%s' % chi_fwd[3]
    bayes_factor = chi2.pdf(chi_fwd[0],1) / chi2.pdf(chi_rev.statistic,1)
    return struct(reject_indep=chi_indep,
                  bayes_fwd_rev=bayes_factor,
                  reject_fwd=chi_fwd[1],
                  reject_rev=chi_rev.pvalue)
Ejemplo n.º 2
0
Archivo: g9.py Proyecto: ababino/efe
def ej14(data_x, data_y, s):
    a2, a1 = np.polyfit(data_x, data_y, 1)
    chi2_t_list = []
    chi2_f_list = []
    for _ in range(1000):
        new_y = np.random.normal(a1 + a2 * data_x, s)
        p = np.polyfit(data_x, new_y, 1)
        chi2_t_list.append(sum(((new_y - a1 - a2 * data_x) / s)**2))
        chi2_f_list.append(sum(((new_y - p[1] - p[0] * data_x) / s)**2))
    plt.figure(1)
    my_hist(chi2_t_list, 20, label='Datos')
    x = np.linspace(0, 30, 100)
    chi211 = chi2.pdf(x, 11)
    plt.plot(x, chi211, label='$\chi^2_{11}$', color=sns.color_palette()[2])
    plt.legend()
    plt.savefig('fig4.jpg')
    plt.figure(2)
    my_hist(chi2_f_list, 20, label='Datos')
    x = np.linspace(0, 30, 100)
    plt.plot(x, chi211, label='$\chi^2_{11}$', color=sns.color_palette()[2])
    chi29 = chi2.pdf(x, 9)
    plt.plot(x, chi29, label='$\chi^2_{9}$', color=sns.color_palette()[3])
    plt.legend()
    plt.savefig('fig5.jpg')
    plt.show()
Ejemplo n.º 3
0
def prob1():
    """
    using monte carlo methods
    """
    #Part A using monte carlo methods
    A = np.array([np.random.randn(10**i)**2 for i in [2, 4, 6]])
    bin = np.linspace(0, 1, 50)
    plt.subplot(131)
    plt.hist(A[0], bins=bin, density=True)
    plt.subplot(132)
    plt.hist(A[1], bins=bin, density=True)
    plt.subplot(133)
    plt.hist(A[2], bins=bin, density=True)

    #compute cdf
    answers_A1 = []
    answers_A2 = []
    answers_A3 = []
    for j in range(3):
        for k in [.5, 1, 1.5]:
            legth = len(A[j])
            answers_A1.append((np.sum(A[j] <= k) / legth, k, (j + 1) * 2))
            mean = np.average(A[j])

            L = A[j] - mean
            L = L**2
        answers_A2.append(mean)
        answers_A3.append(np.sum(L) / len(L))

    #part B built-in functions
    answers_B1 = []
    answers_B2 = []
    answers_B3 = []
    plt.subplot(131)
    plt.plot(bin, chi2.pdf(bin, 1))
    plt.subplot(132)
    plt.plot(bin, chi2.pdf(bin, 1))
    plt.subplot(133)
    plt.plot(bin, chi2.pdf(bin, 1))
    answers_B1 = [chi2.cdf(i, df=1) for i in [0.5, 1, 1.5]]
    plt.show()
    answers_B2.append(chi2.mean(1))
    answers_B3.append(chi2.var(1))
    return "for k = 2, 4, 6, x= 0.5, 1.0, 1.5, the expected value is the first term, x is the second term, k is the third term" + str(
        answers_A1
    ) + " These are the standard erros for each expected value " + str(
        answers_A2) + str(answers_A3) + str(
            answers_B1) + " mean for chi_squared " + str(answers_B2) + str(
                answers_B3)
Ejemplo n.º 4
0
def calculate_bartlett_sphericity(x):
    """
    Test the hypothesis that the correlation matrix
    is equal to the identity matrix.identity

    H0: The matrix of population correlations is equal to I.
    H1: The matrix of population correlations is not equal to I.

    The formula for Bartlett's Sphericity test is:

    .. math:: -1 * (n - 1 - ((2p + 5) / 6)) * ln(det(R))

    Where R det(R) is the determinant of the correlation matrix,
    and p is the number of variables.

    Parameters
    ----------
    x : array-like
        The array from which to calculate sphericity.

    Returns
    -------
    statistic : float
        The chi-square value.
    p_value : float
        The associated p-value for the test.
    """
    n, p = x.shape
    x_corr = corr(x)

    corr_det = np.linalg.det(x_corr)
    statistic = -np.log(corr_det) * (n - 1 - (2 * p + 5) / 6)
    degrees_of_freedom = p * (p - 1) / 2
    p_value = chi2.pdf(statistic, degrees_of_freedom)
    return statistic, p_value
Ejemplo n.º 5
0
def plot_ts_vs_chi2(data, ext_list="ext1_ts", ndf_chi2=[1], subplot=[1, 2, 1], **kwargs):
    ax = plt.subplot(subplot[0], subplot[1], subplot[2])
    ext_data = column(data, "%s" % ext_list)
    clean_data = [x for x in ext_data if not math.isnan(x)]  # remove nan from data
    n, bins, patches = plt.hist(
        clean_data, int(math.ceil(max(column(data, "%s" % ext_list)))), normed=1, facecolor="green"
    )
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    chi2_vals = []
    colors = ["r", "b", "g"]
    for j in range(0, len(ndf_chi2)):
        chi2_vals.append(chi2.pdf(bincenters, ndf_chi2[j]))
        plt.plot(bincenters, chi2_vals[j], "%s--" % colors[j], linewidth=2.0, label="$\chi^2_%i$/2" % ndf_chi2[j])
    legend = ax.legend(loc="upper right", frameon=False)
    plt.ylabel("PDF")
    plt.xlabel("TS$_{%s}$" % ext_list[0:4])
    plt.yscale("log")
    plt.ylim([0.00001, 2.0])

    ax = plt.subplot(subplot[0], subplot[1], subplot[2] + 1)
    n, bins, patches = plt.hist(
        clean_data, int(math.ceil(max(column(data, "%s" % ext_list)))), normed=1, facecolor="green", cumulative=-1
    )
    chi2_sfvals = []
    for j in range(0, len(ndf_chi2)):
        chi2_sfvals.append(chi2.sf(bincenters, ndf_chi2[j]))
        plt.plot(bincenters, chi2_sfvals[j], "%s--" % colors[j], linewidth=2.0, label="$\chi^2_%i$/2" % ndf_chi2[j])
    legend = ax.legend(loc="upper right", frameon=False)
    plt.ylabel("1-CDF")
    plt.xlabel("TS$_{%s}$" % ext_list[0:4])
    plt.yscale("log")
    plt.ylim([0.00001, 2.0])
def Chi2Plots(instance, Nbins):
    """
    This function plots the distribution of the likelihood function obtained by doing
    the MC toy experiments. The distribution should follow a chi^2 distribution with
    Nbins degrees of freedom.
    params  :
              Nbins - int specifying the number of bins, i.e no. of dof.
    returns :
    """
    binning = np.linspace(0, 30, 20)
    plt.hist(instance.lambda_true - instance.lambda_toys,
             normed=True,
             bins=binning,
             label='toy MC')
    x = np.linspace(0, 30, 1000)
    plt.plot(x, chi2.pdf(x, df=8), label=r'$\chi^2$ dof$=8$', lw=3, color='r')

    plt.legend(loc='best')

    pathToChi2Toys = '/mnt/t3nfs01/data01/shome/jandrejk/higgs_model_dep/MoriondAnalysis/plots/chi2_DistFromToys/'
    study_name = 'BSM1_compareDifferentResponseScenarios'
    pathSave = pathToChi2Toys + '/' + instance.obs + '/' + instance.mode + '/'
    ensure_dir(file_path=pathSave)
    plt.savefig(pathSave + 'VHchi2.png')
    plt.show()
Ejemplo n.º 7
0
def likelihoodRatio(idx, p, params1, params2, n=100):

    try:
        alpha1 = params1.alpha.mean()
        beta1 = params1.beta.mean()
        gamma1 = round(params1.gamma.mean()) + 1
    except:
        alpha1 = params1.alpha
        beta1 = params1.beta
        gamma1 = round(params1.gamma) + 1

    try:
        alpha2 = params2.alpha.mean()
        beta2 = params2.beta.mean()
        gamma2 = round(params2.gamma.mean()) + 1
    except:
        alpha2 = params2.alpha
        beta2 = params2.beta
        gamma2 = round(params2.gamma) + 1

    sum1 = logLikelihood([int(x) for x in round(p)], params1, n, idx)
    sum2 = logLikelihood([int(x) for x in round(p)], params2, n, idx)

    ratio = sum1 - sum2

    if ratio >= 0:
        logStatus(Status(1, idx, "Could not perform likelihood ratio test."))
        return float('nan')

    pVal = chi2.pdf(-2 * ratio, 3)

    return pVal
Ejemplo n.º 8
0
 def calculate_daughter(self, x):
     """
   Calculate daughter probability for amplitude
 """
     #    c = x*self.__emittance/(self.__parent_emittance**2)
     c = x / self.__emittance
     return chi2.pdf(c, self.__ndf)
Ejemplo n.º 9
0
    def plot_A0(self):
        sG=np.sqrt(np.var(self.gA0))
        mG=np.mean(self.gA0)
        amin, amax=plt.xlim()
        alist=np.arange(3*amin, amax*3, amax/250.)
        theoryGdist=norm.pdf(alist, loc=mG, scale=sG)

        for i in range(len(self.fgnls)):
            if (self.theoryplot==False):
                lbl=self.TYPELABEL+"$=$"+NtoSTR(self.fgnls[i])
            else:
                lbl=None

            neg=np.min(self.fgNLA0[i]-self.gA0)
            const=9*6.*self.fgnls[i]*self.phisq
            plot_hist(plt, (self.fgNLA0[i]-self.gA0+const), clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled')
            #plot_hist(plt, (self.fgNLA0[i]-self.gA0), clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled')

            amin, amax=plt.xlim()
            alist=np.arange(3*amin, amax*3, 0.001)
            if (self.theoryplot):
                scl=6.0*self.A0const*self.Nconst*self.fgnls[i]
                theorynGdist=np.sign(self.fgnls[i])*chi2.pdf(alist,1, scale=scl)
                plt.plot(alist, theorynGdist, self.clrs[i+1], linestyle=self.ls[i+1], linewidth=LW, label=self.TYPELABEL+"="+NtoSTR(self.fgnls[i]))

        plt.xlabel(r'$A_0$')
        plt.ylabel(r'$p(A_0)$')
        plt.yscale('log')

        plt.ylim(0.01, 100)
        plt.xlim(-0.02, 0.25)

        plt.legend()
Ejemplo n.º 10
0
def chi2_2sample(s1, s2, bins=20, range=(0,1)):
    """
    Chi square two sample test
    see: http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm

    @return chi-square-statistic, degrees of freedom, p-value
    """

    while True:
        R, _e = numpy.histogram(s1, bins=bins, range=range)
        S, _e = numpy.histogram(s2, bins=bins, range=range)
        if min(R) >= 5 and min(S) >= 5:
            break
        bins -= 1
        logging.debug('decreased bin count to %d for X2 test: less than 5 samples in a bin' % bins)

    def _k1(s, r):
        return math.sqrt(float(numpy.sum(s))/numpy.sum(r))
    def _k2(s, r):
        return math.sqrt(float(numpy.sum(r))/numpy.sum(s))

    x2 = 0
    K1 = _k1(R, S)
    K2 = _k2(R, S)

    for i, r in enumerate(R):
        u = (K2 * R[i] - K2 * S[i])**2
        d = R[i] + S[i]
        x2 += u/d

    df = bins - 1
    pvalue = chi2.pdf(x2, df)
    return x2, df, pvalue
Ejemplo n.º 11
0
    def CombinedPvalue(self,output):
        """
        Returns the p value of the combined pvalues based on the 
        method selected
        """
        self.n = len(self.N)

        if self.method == 'Tippett':
            self.output = output
            output = 1-(1-output)**self.n#beta.pdf(output,a = 1, b = self.n) #ST is Beta(1,n)
        elif self.method == 'Stouffer':
            self.output = output
            output = norm.pdf(output,scale = self.n) #SS is N(0,n)
        elif self.method == 'George':
            self.output = output
            output = t.pdf(output,self.n) #SG is Student t distribution (n)
        elif self.method == 'Ed':
            self.output = output
            output = gamma.pdf(output,a = self.n) #SE is Gamma(x,n)
        elif self.method == 'Pearson':
            self.output = output
            output = chi2.cdf(output,2*self.n) #SP is Chi-square df=2n
        else:
            self.output = output
            output = chi2.pdf(output,2*self.n) #SF is Chi-square df=2n
        
        return output
Ejemplo n.º 12
0
def plot_counts(data, xlabel=None, title=None, save=False):
    fig, ax = plt.subplots(1, 1, tight_layout=True)
    for p in data:
        ax.errorbar(x=p['x'],
                    y=p['y'],
                    xerr=p['xerr'],
                    yerr=p['yerr'],
                    linestyle='',
                    color=p['color'],
                    marker=p['marker'],
                    label=p['label'],
                    alpha=0.8)
    #ax.errorbar(x=data['bin_centres'], y=data['counts'], fmt='+')
    ax.plot(p['x'],
            chi2.pdf(p['x'], df=1),
            label='Χ²',
            linestyle='-.',
            color='black',
            alpha=0.6)
    if title:
        ax.set_title(title)
    if xlabel:
        ax.set_xlabel(xlabel)
    ax.set_yscale('log')
    ax.set_ylabel('counts (normalized)')
    ax.legend(loc='best')
    if save:
        plt.savefig(save)
        log.info(f'ts distribution plot saved in {save}')
    else:
        plt.show()
def get_calibration(distribution, realizations, b=10, plot=False):
    """
    b : number of bins.
    distribution of the variable and realizations of the variable. (mean, std), float list of length T.
    """
    mean, std = distribution
    # distribution will be divided into b bins delimited by the deciles of the disribution.
    pdf = norm.pdf(np.linspace(-5, 5, 100))
    rvf = norm.rvs(size=1000, loc=mean, scale=std)
    deciles = set(qcut(rvf, b).to_numpy())
    deciles = [x.right for x in deciles]
    if plot == True:
        plt.plot(np.linspace(-5, 5, 100), pdf)
        x_axis = deciles
        y_axis = [norm.pdf(x) for x in x_axis]
        plt.scatter(x_axis, y_axis)
        plt.show()
    deciles = sorted([-5] + deciles[:-1] + [5])
    bins = [[deciles[i], deciles[i + 1]] for i in range(len(deciles) - 1)]

    p_content = [1 / b for i in range(b)]
    counts, _ = np.histogram(realizations, deciles)
    counts = counts / np.sum(counts)

    # Now that we have the bins, we should get the
    right_tail = 2 * len(realizations) * np.sum([
        counts[i] * np.log((counts[i] / p_content[i] if counts[i] != 0 else 1))
        for i in range(len(p_content))
    ])

    C = 1 - chi2.pdf(right_tail, df=b - 1)

    return C
Ejemplo n.º 14
0
 def BackTesting(self, M):
     ex = self.exceptions(M)
     p = np.array(self.alphas)
     x = ex.sum()[1:].values
     m = ex.count()[1:].values
     p_est = x / m
     level = np.array([0.05] * len(self.alphas))
     BackTesting = m * p
     num = (p**x) * (1 - p)**(m - x)
     den = (p_est**x) * (1 - p_est)**(m - x)
     test_kupiec = -2 * np.log(list(num / den))
     Pvalue = chi2.pdf(test_kupiec, 1)
     zone = np.where(Pvalue <= level, "Rechazo H0", "No Rechazo H0")
     #**********************************************************************
     df = pd.DataFrame(
         {
             'x': x,
             'm': m,
             'p estimado': p_est,
             'nivel de significancia': level,
             'test de kupiec': test_kupiec,
             'Valor P': Pvalue,
             "Zona de rechazo": zone,
             'BackTesting': BackTesting,
             "Valor de eficiencia": 1 - p_est
         },
         index=["p=" + str(pp) for pp in p])
     return df.T
Ejemplo n.º 15
0
def visualize_pruning(
        w_norm,
        n_retained,
        title='Initial model weights vs theoretical for pruning'):
    fig, ax1 = plt.subplots()
    ax1.set_title(title)
    ax1.hist(w_norm,
             normed=True,
             bins=200,
             alpha=0.6,
             histtype='stepfilled',
             range=[0, n_retained * 5])
    ax1.axvline(x=n_retained, linewidth=1, color='r')
    ax1.set_ylabel('PDF', color='b')

    ax2 = ax1.twinx()
    ax2.set_ylabel('Survival Function', color='r')

    ax1.set_xlabel('w_norm')

    x = np.linspace(chi2.ppf(0.001, n_retained), chi2.ppf(0.999, n_retained),
                    100)
    ax2.plot(x,
             chi2.sf(x, n_retained),
             'g-',
             lw=1,
             alpha=0.6,
             label='chi2 pdf')
    ax1.plot(x,
             chi2.pdf(x, n_retained),
             'r-',
             lw=1,
             alpha=0.6,
             label='chi2 pdf')
Ejemplo n.º 16
0
    def plot_A0(self):
        sG=np.sqrt(np.var(self.gA0))
        mG=np.mean(self.gA0)
        amin, amax=plt.xlim()
        alist=np.arange(3*amin, amax*3, amax/250.)
        theoryGdist=norm.pdf(alist, loc=mG, scale=sG)
        
        for i in range(len(self.fgnls)):
            if (self.theoryplot==False):
                lbl=self.TYPELABEL+"$=$"+NtoSTR(self.fgnls[i])
            else:
                lbl=None
            
            neg=np.min(self.fgNLA0[i]-self.gA0)
            const=9*6.*self.fgnls[i]*self.phisq
            plot_hist(plt, (self.fgNLA0[i]-self.gA0+const)/(1-9.*3.*self.fgnls[i]*self.phisq), clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled')
                
            amin, amax=plt.xlim()
            alist=np.arange(3*amin, amax*3, 0.001)
            if (self.theoryplot):
                scl=6.0*self.A0const*self.Nconst*self.fgnls[i]
                theorynGdist=np.sign(self.fgnls[i])*chi2.pdf(alist,1, scale=scl)
                plt.plot(alist, theorynGdist, self.clrs[i+1], linestyle=self.ls[i+1], linewidth=LW, label=self.TYPELABEL+"="+NtoSTR(self.fgnls[i]))
            
        plt.xlabel(r'$A_0$')
        plt.ylabel(r'$p(A_0)$')
        plt.yscale('log')

        plt.ylim(0.01, 100)
        plt.xlim(-0.02, 0.25)
        
        plt.legend()
Ejemplo n.º 17
0
    def get_chi2_text(self, a1):

        k = 0
        chi2_value = 0
        best_value = a1.get_best_fit()['parameters']
        r_ = len(best_value)
        for spec in self.spectrumlist:
            sp = spec.spectrum
            e_add = spec.e_add
            rate = self.model(e_add, best_value)
            spec1 = self.get_A(rate, spec.e_lo, spec.e_hi, spec.e_add_num)
            model_sp = spec.transform(spec1)
            effinde = spec.effective_index
            if effinde is not None:
                model_sp = model_sp[effinde[0]:effinde[-1]]
                sp = sp[effinde[0]:effinde[-1]]

            index_list = self.chi2_check(model_sp)
            new_model_sp = []
            new_sp = []
            for inde_i in index_list:
                new_model_sp.append(np.sum(model_sp[inde_i]))
                new_sp.append(np.sum(sp[inde_i]))
            new_model_sp = np.array(new_model_sp)
            new_sp = np.array(new_sp)
            chi2_s = (new_sp - new_model_sp)**2 / new_model_sp
            k = k + len(chi2_s)
            chi2_value = chi2_value + chi2_s.sum()

        df = k - r_ - 1
        p = chi2.pdf(chi2_value, df)
        return {'p': p, 'df': df, 'chi2': chi2_value}
Ejemplo n.º 18
0
def chisquareMC(df):
    x = np.random.random() * 10
    fx = np.random.random() * 1
    if chi2.pdf(x, df) >= fx:
        return x
    else:
        return chisquareMC(df)
Ejemplo n.º 19
0
    def test_inv_chi2_pdf(self):
        """
        Test to the Octave function.
        >> format long
        >> chi2pdf(1.5^-1, 4.0)
           ans =  0.119421885095632

        Inverse-chi-square distribution Calculator
        http://keisan.casio.com/exec/system/1304908316 gives
        gives (1.5, 4) -> 0.0530763933


        R
        > library(geoR)
        > dinvchisq(1.5, df=4)
        > ans=0.05307639

        """
        octave_chi2pdf = 0.119421885095632
        online_calculator = 0.0530763933
        r_value = 0.05307639
        v1 = 1.5
        v2 = 4.0
        my_func = inv_chi2(v1, v2)
        using_inv_gamma = invgamma.pdf(v1, v2 / 2.0, 0.5)
        print("... Value obtained using inv-gamma :", using_inv_gamma)
        scipy_chi2pdf = chi2.pdf(v1**-1, v2)
        print("... Value obtained using chi2 :", scipy_chi2pdf)
        self.assertAlmostEqual(octave_chi2pdf, scipy_chi2pdf, places=5)
        self.assertAlmostEqual(my_func, octave_chi2pdf, places=5)
Ejemplo n.º 20
0
        def loglambda_plot(values, name):
            thetas = values.keys()
            nplots = len(thetas)

            rcParams['xtick.major.pad'] = 12
            rcParams['ytick.major.pad'] = 12

            # For now plot only theta = 0.7
            fig = plt.figure()
            plt.hist(np.array(values[0.7]) * 2.0,
                     normed=True,
                     bins=75,
                     range=(0, 6.5),
                     color='#20A387')

            xvals = np.linspace(0, 6.5, 300)
            plt.plot(xvals,
                     chi2.pdf(xvals, df=1.0),
                     color='#440154',
                     alpha=0.8,
                     label=r'$\chi^{2}$, n.d.f = 1')

            plt.xlabel(r'$-2 \ln \frac{L(\alpha = 0.7)}{L(\hat{\alpha})}$')
            plt.ylabel('Probability density / %.2f' % (6.5 / 75))
            fig.canvas.set_window_title(name)
            plt.legend(loc='upper right')
            plt.xlim([0, 6.5])
            plt.subplots_adjust(left=0.15, right=0.97, top=0.97, bottom=0.17)
            fig.show()
            fig.savefig('loglambda_chisquare_mle_nn.pdf')
Ejemplo n.º 21
0
  def calculate_daughter(self, x) :
    """
      Calculate daughter probability for amplitude
    """
#    c = x*self.__emittance/(self.__parent_emittance**2)
    c = x/self.__emittance
    return chi2.pdf(c, self.__ndf)
Ejemplo n.º 22
0
def sdist(x, sigma, n=10):
    '''
    Calculate PDF of the sampling distribution for standard deviations given the number
    of samples obtained and the standard deviation of the original distribution.

    The sampling distribution is given by:
        (n*S**2)/sigma**2 ~ X2(10)
        g'(S) ~ X2(10)
    where S is the standard deviation. We can therefore not directly evaluate
    the desired pdf. Instead we have to work with this transformed distribution.

    Define:
        g(S) = sqrt(S*sigma**2/n)
        g'(S) = n*S**2/sigma**2
        pdf(g'(S)) = chi2.pdf(g'(S), 10)

    Then the pdf of the transformd variable is given by:
        pfg(S) = chi2.pdf(g'(S), 10)* g'(S)/dy

    See http://math.arizona.edu/~jwatkins/f-transform.pdf
    '''
    f = lambda x: chi2.pdf(x, n)
    nb = n + 1
    g_inv = lambda x: (nb * x**2) / (sigma**2)
    dyg_inv = lambda x: (2 * nb * x) / (sigma**2)
    y = f(g_inv(x)) * dyg_inv(x)
    return y
Ejemplo n.º 23
0
def art_qi2(img, airmask, min_voxels=int(1e3), max_voxels=int(3e5), save_plot=True):
    r"""
    Calculates :math:`\text{QI}_2`, based on the goodness-of-fit of a centered
    :math:`\chi^2` distribution onto the intensity distribution of
    non-artifactual background (within the "hat" mask):


    .. math ::

        \chi^2_n = \frac{2}{(\sigma \sqrt{2})^{2n} \, (n - 1)!}x^{2n - 1}\, e^{-\frac{x}{2}}

    where :math:`n` is the number of coil elements.

    :param numpy.ndarray img: input data
    :param numpy.ndarray airmask: input air mask without artifacts

    """

    from sklearn.neighbors import KernelDensity
    from scipy.stats import chi2
    from mriqc.viz.misc import plot_qi2

    # S. Ogawa was born
    np.random.seed(1191935)

    data = img[airmask > 0]
    data = data[data > 0]

    # Write out figure of the fitting
    out_file = op.abspath('error.svg')
    with open(out_file, 'w') as ofh:
        ofh.write('<p>Background noise fitting could not be plotted.</p>')

    if len(data) < min_voxels:
        return 0.0, out_file

    modelx = data if len(data) < max_voxels else np.random.choice(
        data, size=max_voxels)

    x_grid = np.linspace(0.0, np.percentile(data, 99), 1000)

    # Estimate data pdf with KDE on a random subsample
    kde_skl = KernelDensity(bandwidth=0.05 * np.percentile(data, 98),
                            kernel='gaussian').fit(modelx[:, np.newaxis])
    kde = np.exp(kde_skl.score_samples(x_grid[:, np.newaxis]))

    # Find cutoff
    kdethi = np.argmax(kde[::-1] > kde.max() * 0.5)

    # Fit X^2
    param = chi2.fit(modelx[modelx < np.percentile(data, 95)], 32)
    chi_pdf = chi2.pdf(x_grid, *param[:-2], loc=param[-2], scale=param[-1])

    # Compute goodness-of-fit (gof)
    gof = float(np.abs(kde[-kdethi:] - chi_pdf[-kdethi:]).mean())
    if save_plot:
        out_file = plot_qi2(x_grid, kde, chi_pdf, modelx, kdethi)

    return gof, out_file
Ejemplo n.º 24
0
def differential_lrt(x, y, xmin=0):
    from scipy.stats import chi2
    lrtX = bimod_likelihood(x)
    lrtY = bimod_likelihood(y)
    lrtZ = bimod_likelihood(np.concatenate((x, y)))
    lrt_diff = 2 * (lrtX + lrtY - lrtZ)

    return chi2.pdf(x=lrt_diff, df=3)
Ejemplo n.º 25
0
 def chi2(emission_index=0, t_horizon=100, tstep=.1):
     """
     # use k paramater= t+2 as done in Cherubini (doi: 10.1111/j.1757-1707.2011.01156.x)
     # """
     yrs = np.linspace(0, t_horizon, (t_horizon / tstep + 1))
     decay = chi2.pdf(yrs, emission_index + 2)
     decay = decay * tstep
     return decay
Ejemplo n.º 26
0
def art_qi2(img,
            airmask,
            min_voxels=int(1e3),
            max_voxels=int(3e5),
            save_plot=True):
    r"""
    Calculates :math:`\text{QI}_2`, based on the goodness-of-fit of a centered
    :math:`\chi^2` distribution onto the intensity distribution of
    non-artifactual background (within the "hat" mask):
    .. math ::
        \chi^2_n = \frac{2}{(\sigma \sqrt{2})^{2n} \, (n - 1)!}x^{2n - 1}\, e^{-\frac{x}{2}}
    where :math:`n` is the number of coil elements.
    :param numpy.ndarray img: input data
    :param numpy.ndarray airmask: input air mask without artifacts
    """

    from sklearn.neighbors import KernelDensity
    from scipy.stats import chi2
    from mriqc.viz.misc import plot_qi2

    # S. Ogawa was born
    np.random.seed(1191935)

    data = img[airmask > 0]
    data = data[data > 0]

    # Write out figure of the fitting
    out_file = op.abspath('error.svg')
    with open(out_file, 'w') as ofh:
        ofh.write('<p>Background noise fitting could not be plotted.</p>')

    if len(data) < min_voxels:
        return 0.0, out_file

    modelx = data if len(data) < max_voxels else np.random.choice(
        data, size=max_voxels)

    x_grid = np.linspace(0.0, np.percentile(data, 99), 1000)

    # Estimate data pdf with KDE on a random subsample
    kde_skl = KernelDensity(bandwidth=0.05 * np.percentile(data, 98),
                            kernel='gaussian').fit(modelx[:, np.newaxis])
    kde = np.exp(kde_skl.score_samples(x_grid[:, np.newaxis]))

    # Find cutoff
    kdethi = np.argmax(kde[::-1] > kde.max() * 0.5)

    # Fit X^2
    param = chi2.fit(modelx[modelx < np.percentile(data, 95)], 32)
    chi_pdf = chi2.pdf(x_grid, *param[:-2], loc=param[-2], scale=param[-1])

    # Compute goodness-of-fit (gof)
    gof = float(np.abs(kde[-kdethi:] - chi_pdf[-kdethi:]).mean())
    if save_plot:
        out_file = plot_qi2(x_grid, kde, chi_pdf, modelx, kdethi)

    return gof, out_file
Ejemplo n.º 27
0
def plot_chisq_hist(ax, chisqs, nu):
    '''Plots a histogram of chisq values
    Also overplots a chisq distribution for nu degrees of freedom.
    '''
    n, bins, patches = plt.hist(chisqs, density=False, facecolor='g')
    plt.ylabel("Number of Occurrences")
    plt.xlabel("Chisq Value")
    x = np.linspace(0, 6, 100)
    plt.plot(x, chi2.pdf(x, nu)* 100000,'r-', lw=5, alpha=0.6, label='chi2 pdf')
Ejemplo n.º 28
0
 def pdf_random_rotation(self, x, v, mu, kappa, n):
     """
     Gives back the probability of observing the vector x, such that its angle with v is coming from a Von Mises
     distribution with k = self.kappa and its length coming form chi squared distribution with the parameter n.
     """
     v = v/LA.norm(v,2)
     x = x/LA.norm(x,2)
     ang = sum(v*x)
     return (.5/np.pi)*(chi2.pdf(n*LA.norm(x,2),n)*n)*(vonmises.pdf(ang, kappa))
Ejemplo n.º 29
0
def chi2_curve():
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(4, 2)
    x = np.linspace(0, 40, 1000)
    ax.plot(x, chi2.pdf(x, 2), "r-", lw=2, alpha=0.6, label="chi2")
    ax.vlines(32.977, 0, 0.5, linestyle="--")
    ax.set_xlabel("Valeur du Chi2")
    ax.set_ylabel("Probabilité")
    return fig
Ejemplo n.º 30
0
def main():
    fig, ax = plt.subplots(1, 1)
    df = 5
    loc = 20
    scale = 8

    mean, var, skew, kurt = chi2.stats(df, moments='mvsk')
    #x = np.linspace(chi2.ppf(0.01, df, loc, scale),chi2.ppf(0.99, df, loc, scale), 20)
    valmax = int(chi2.ppf(0.99, df, loc, scale)) + 1
    if (valmax % 2 != 0): valmax = valmax + 1
    #valmax= 46
    x = np.linspace(0, valmax, valmax + 1)

    #print (x)
    proba = chi2.pdf(x, df, loc, scale)
    vs = map(repr, proba.tolist())
    repartition = list(zip(x.tolist(), proba.tolist()))

    nbTranche = 5
    valmin = loc
    space = (valmax - valmin) / nbTranche

    remain = 100
    tranche0 = int(100 *
                   sum([pro for val, pro in repartition if (val < valmin)]))
    remain -= tranche0
    print("P(v < %d) = %d" % (valmin, tranche0))
    for traidx in range(nbTranche):
        deb = valmin + traidx * space
        fin = valmin + (traidx + 1) * space
        tranche = int(100 * sum(
            [pro for val, pro in repartition if (val >= deb) and (val < fin)]))
        remain -= tranche
        print("P(%d <= v < %d) = %d" % (deb, fin, tranche))
    trancheF = 100 * sum([
        pro for val, pro in repartition if (val >= valmin + nbTranche * space)
    ])
    print("P(v >= %d) = %d" % (valmin + nbTranche * space, remain))

    for certitude in [0.6, 0.7, 0.75, 0.8, 0.85, 0.90, 0.95]:
        print("certi = %d , val = %d" %
              (certitude, int(chi2.ppf(certitude, df, loc, scale))))
    #print (" ".join(list(vs)).replace (".",","))
    #print (" ".join(proba.tolist()))
    #print (" ".join().replace(".",","))
    ax.plot(x, proba, 'r-', lw=5, alpha=0.6, label='chi2 pdf')

    #rv = chi2(df, loc, scale)
    #ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
    #vals = chi2.ppf([0.001, 0.5, 0.999], df, loc, scale)
    #np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df, loc, scale))
    #r = chi2.rvs(df,  loc = loc, scale = scale ,  size=1000)
    #ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
    #ax.legend(loc='best', frameon=False)

    plt.show()
Ejemplo n.º 31
0
def dchisq(x,df,ncp=0):
    """
    Calculates the density/point estimate of the chi-square distribution
    """
    from scipy.stats import chi2,ncx2
    if ncp==0:
        result=chi2.pdf(x=x,df=df,loc=0,scale=1)
    else:
        result=ncx2.pdf(x=x,df=df,nc=ncp,loc=0,scale=1)
    return result
def chi_squared(df):
    fig, ax = plt.subplots(1, 1)

    # Calculate a few first moments:
    mean, var, skew, kurt = chi2.stats(df, moments='mvsk')

    # Display the probability density function (pdf):
    x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf')
    plt.show()
Ejemplo n.º 33
0
def posterior_sigma(x, mu0, nu0, sigma0, y, kappa0):
    n = len(y)
    bar_y = np.mean(y)
    nu_n = nu0 + n
    s = np.std(y)
    sigma_n_squared = (nu0 * sigma0**2 + (n - 1.0) * s**2 + (kappa0 * n) *
                       (bar_y - mu0) / (kappa0 + n)) / nu_n
    p_squared = chi2.pdf(nu_n**(-1.0), sigma_n_squared)
    p = np.sqrt(p_squared)
    return p
Ejemplo n.º 34
0
 def report_lr(model_one_likelihood, model_two_likelihood):
     x = -2*model_two_likelihood+2*model_one_likelihood
     #p_value = -1
     p_value = chi2.pdf(x,2)
     r = "-------------------------\n"
     r += "--Likelihood Ratio Test--\n"
     r += "-------------------------\n"
     r+="| The likelihood ratio of MODEL TWO/MODEL ONE (NULL/ALT) is %4.2f\n| This has a p_value of %s\n" % (x,p_value) 
     r += "-------------------------\n"
     return [r,[x,p_value]]    
Ejemplo n.º 35
0
def get_pdf(dist, x, df=1, loc=0, scale=1):
    if dist == 'normal':
        y = norm.pdf(x, loc=loc, scale=scale)
    elif dist == 'uniform':
        y = uniform.pdf(x, loc=loc, scale=scale)
    elif dist == 'chi2':
        y = chi2.pdf(x, df)
    else:
        print("No distribution found with name {:s}".format(dist))
        y = np.zeros_like(x)
    return y
Ejemplo n.º 36
0
def stat(samples, sets):


	S = [sum(map(pow, [random.gauss(1, random.random()) for x in range(0,samples)], [2 for x in range(0,samples)] )) for y in range(0,sets)]
	hist, bins = np.histogram(S, bins = 100)

	# plt.bar(np.mean(S), hist, align='center')

	rv = chi2.pdf(S)
	x = np.linspace(0, np.minimum(rv.dist.b, 3))
	h = plt.plot(x, rv.pdf(x))
Ejemplo n.º 37
0
	def confidence_update(self, update, betas):
		"""
		To estimate theta, we need to retrieve the appropriate P(E | beta),
		then optimize the gradient step with Newton Rapson.
		---

		Params:
			update -- the full update step that will get weighted by confidence
			betas -- a vector of beta values for all features
		Returns:
			weights -- a vector of the new weights
		"""
		confidence = [1.0] * self.environment.num_features
		for i in range(self.environment.num_features):
			### Compute update using P(r|beta) for the beta estimate we just computed ###
			# Compute P(r|beta)
			mus1 = self.P_beta[self.environment.feature_list[i]+"1"]
			mus0 = self.P_beta[self.environment.feature_list[i]+"0"]
			p_r0 = chi2.pdf(betas[i],mus0[0],mus0[1],mus0[2]) / (chi2.pdf(betas[i],mus0[0],mus0[1],mus0[2]) + chi2.pdf(betas[i],mus1[0],mus1[1],mus1[2]))
			p_r1 = chi2.pdf(betas[i],mus1[0],mus1[1],mus1[2]) / (chi2.pdf(betas[i],mus0[0],mus0[1],mus0[2]) + chi2.pdf(betas[i],mus1[0],mus1[1],mus1[2]))
			l = math.pi

			# Newton-Rapson setup; define function, derivative, and call optimization method.
			def f_theta(weights_p):
				num = p_r1 * np.exp(weights_p * update[i])
				denom = p_r0 * (l/math.pi) ** (self.environment.num_features/2.0) * np.exp(-l*update[i]**2) + num
				return weights_p + self.step_size * num * update[i]/denom - self.environment.weights[i]
			def df_theta(weights_p):
				num = p_r0 * (l/math.pi) ** (self.environment.num_features/2.0) * np.exp(-l*update[i]**2)
				denom = p_r1 * np.exp(weights_p*update[i])
				return 1 + self.step_size * num / denom

			weight_p = newton(f_theta,self.environment.weights[i],df_theta,tol=1e-04,maxiter=1000)

			num = p_r1 * np.exp(weight_p * update[i])
			denom = p_r0 * (l/math.pi) ** (self.environment.num_features/2.0) * np.exp(-l*update[i]**2) + num
			confidence[i] = num/denom

		print "Here is weighted beta:", confidence
		weights = self.environment.weights - np.array(confidence) * self.step_size * update
		return weights
Ejemplo n.º 38
0
def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))
    
    #simulate the chi2 distribution
    y = []
    n=10
    for i in range(1000):
        chi2r=0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r=chi2r+r[j]**2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
Ejemplo n.º 39
0
    def score_prior(self, ss, hps):
        score = 0

        for mu, var in zip(ss['mu'], ss['var']):
            if mu <= self.EPSILON or mu >= (1-self.EPSILON):
                return -np.inf
            if (var / hps['var_scale']) < self.EPSILON:
                return -np.inf
            score += np.log(chi2.pdf(var/hps['var_scale'], self.CHI_VAL))
        
        score += log_dirichlet_dens(ss['pi'], np.ones(hps['comp_k'])*hps['dir_alpha'])
        return score
Ejemplo n.º 40
0
def plot_chi2(df):
    x = list()
    y = list()
    for i in np.arange(0, 14, 0.1):
        x.append(i)
        # y.append(stats.chi2(i))
        y.append(chi2.pdf(i, df))

    plt.plot(x, y)
    plt.axis([0, 14, 0, 0.5])
    plt.grid()
    plt.show()
Ejemplo n.º 41
0
def sampling_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x, df))

    #simulate the sampling distribution
    y = []
    for i in range(1000):
        r = norm.rvs(loc=5, scale=2, size=df+1)
        rchi2 =(df)*np.var(r)/4
        y.append(rchi2)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.savefig('sampling_distribution.png')
Ejemplo n.º 42
0
def rollQStat(n=25):
    now = dt.datetime.now()
    precalculatedDict = {}
    iDate = dt.datetime(1996, 1, 31)
    while iDate < now:
        try:
            nameArray, Qarray = Qstatistic(iDate)
            p_values = [chi2.pdf(x, 6) for x in Qarray]
            precalculatedDict[iDate]= [p_values, Qarray]
            iDate = addMonths(iDate, 1)
            print iDate.strftime('%Y/%m/%d %H:%M:%S')
        except:
            print "Unexpected error:", sys.exc_info()[0]
            raise
    Nameoutput = open('Qstatistic.pkl', 'wb')
    pickle.dump(precalculatedDict, Nameoutput)
Ejemplo n.º 43
0
def chi2_distribution():
    fig,ax = plt.subplots(1, 1)
    df = 10
    x=np.linspace(chi2.ppf(0.01, df),chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))

    y = []
    n=10
    for i in range(1000):
        chi2r = 0.0
        r = norm.rvs(size = 10)
        for j in range(10):
            chi2r = chi2r + r[j]**2
        y.append(chi2r)
    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
Ejemplo n.º 44
0
def make_ts_hist(tab,masks,cols,labels,cumulative=True):

    if not isinstance(cols,list):
        cols = [cols]*len(masks)
    
    plt.figure()
    for m,l,c in zip(masks,labels,cols):
        h = Histogram(Axis.create(0.0,20,100))
        h.fill(tab[m][c])
        h = h.normalize()
        if cumulative:
            h = h.cumulative(lhs=False)
        h.plot(hist_style='step',alpha=0.3,linewidth=2,label=l)
        
    dof  = 2
    label = r"$\chi^2_{1} / 2$"
    kwargs = dict( label = label, lw=1.5, c='k',dashes=(5,2))

    
    if cumulative:
        plt.gca().plot(h.axis(0).center,
                       0.5*(1-chi2.cdf(h.axis(0).edges[:-1],1)),**kwargs)
    else:
        plt.gca().plot(h.axis(0).center,
                       h.axis(0).width*chi2.pdf(h.axis(0).center,1),**kwargs)
        
    label = r"$\chi^2_{2} / 2$"
    kwargs = dict( label = label, lw=1.5, c='r',dashes=(5,2))
#    plt.gca().plot(h.axis(0).center,
#                   0.5*(1-chi2.cdf(h.axis(0).center,2)),**kwargs)

#    if cumulative:
#        plt.gca().plot(h.axis(0).center,
#                       0.5*(1-chi2.cdf(h.axis(0).edges[:-1],2)),**kwargs)
#    else:
#        plt.gca().plot(h.axis(0).center,
#                       h.axis(0).width*chi2.pdf(h.axis(0).center,2),**kwargs)
    
    plt.gca().set_yscale('log')
    plt.gca().set_ylim(1E-4,1)
    plt.gca().legend(frameon=False)
    plt.gca().set_xlabel('TS$_\mathrm{ext}$')
    plt.gca().set_ylabel('Cumulative Fraction')
Ejemplo n.º 45
0
def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    # display the probability density function
    df = 10
    x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x, df))

    # simulate the chi2 distribution
    y = []
    n = 10
    for i in range(1000):
        chi2r = 0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r = chi2r + r[j] ** 2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2)
    plt.savefig('chi2_distribution.png')
Ejemplo n.º 46
0
def stats(agg):

    lags_ms = (agg.lags / agg.sample_rate) * 1e3

    regions_to_use = ['L1', 'L2', 'L3', 'CM', 'NCM']
    for reg in regions_to_use:

        print '----------------  %s  ----------------' % reg

        i = (agg.df.cc > 0.20) & (agg.df.region == reg)
        xi = agg.df[i].xindex.values
        filts = agg.filters[xi, :]

        cfreqs = compute_best_freq(filts, agg.sample_rate, lags_ms)
        cfreqs = cfreqs[cfreqs > 0]
        cfreqs = cfreqs.reshape([len(cfreqs), 1])

        gmm1 = GMM(n_components=1)
        gmm1.fit(cfreqs)
        lk_null = gmm1.score(cfreqs).sum()
        aic_null = gmm1.aic(cfreqs)

        gmm2 = GMM(n_components=2)
        gmm2.fit(cfreqs)
        print 'Center frequencies of 2-component GMM:',gmm2.means_.squeeze()
        print 'Covariances: ',np.sqrt(gmm2.covars_.squeeze())
        lk_full = gmm2.score(cfreqs).sum()
        aic_full = gmm2.aic(cfreqs)

        lk_rat = -2*(lk_null - lk_full)

        chi_df = 2
        pval = chi2.pdf(lk_rat, chi_df)

        print 'Null likelihood: %0.6f' % lk_null
        print 'Full likelihood: %0.6f' % lk_full
        print 'Likelihood Ratio: %0.6f' % lk_rat
        print 'p-value: %0.6f' % pval

        print 'Null AIC: %0.6f' % aic_null
        print 'Full AIC: %0.6f' % aic_full
        print 'Relative Likelihood (N=%d): %0.6f' % (i.sum(), np.exp((aic_full - aic_null) / 2.))
Ejemplo n.º 47
0
 def PlotChi2(self):    
     img = gfh.ReadData(self.chi2)
     hmin = 0#chi2.ppf(0.0001, df)
     hmax = 1000#chi2.ppf(0.9999, df)
     bins = 30000
     
     x = np.linspace(hmin,hmax, bins)
     
     
     chng = gfh.ReadData(self.chng)
     nochng = (1 - chng).astype(np.uint8)
     
     histr = cv2.calcHist([img],[0],nochng,[bins],[hmin,hmax])
     histr /= sp.integrate.trapz(np.squeeze(histr),x = x)
     
     plt.figure(4)
     plt.plot(x, chi2.pdf(x, self.nvar),'r-', lw=5, alpha=0.6, label='chi2 pdf')
     plt.plot(x,histr, label = 'chi2 histogram')
     plt.show()
     plt.legend()
Ejemplo n.º 48
0
def randomOfRandomness():
    """
    Здесь содержится функция, которая создаёт агентов с исходными данными
    """
    wealthOfAllDelta = []
    number = 0
    for i in np.linspace(0.00001, 0.061, 500):
        randomNumbers = []  #Список для хранения случайных чисел
        wealthDivided = []  # Богатство, поделенное случайным образом на случайное количество частей
        wealthOfOneDeltaASRS = []

        wealthOfOneDelta = 1000 * chi2.pdf(600 * i, 20)+0
        numberOfAgents = random.randint(1, 1)   # Какое количество агентов будет с одним дельта

        for ii in range(numberOfAgents):           # Добавление случайных чисел в список
            randomNumbers.append(random.randint(1,100))

        sumOfrandomAgents = sum(randomNumbers)

        for x in range(numberOfAgents): # Богатство поделено между агентами в рамках одного дельта
            wealthDivided.append(randomNumbers[x] * wealthOfOneDelta / sumOfrandomAgents)

        for wealth in wealthDivided:
            silver = random.uniform(0, wealth)
            realEstateMoney = wealth - silver
            number += 1
            wealthOfOneDeltaASRS.append([realEstateMoney, silver, i, number])

        wealthOfAllDelta.append(wealthOfOneDeltaASRS)

    #wealthOfAllDelta = np.array(wealthOfAllDelta)
    with open('simple_population', 'wb') as f:
        pickle.dump(wealthOfAllDelta, f)

    with open('numberOfagents', 'wb') as ff:
        pickle.dump(number, ff)
simple_LL = -1*loss_function(simplified_results, func=simulate_simplified_dynamics)

full_AIC = 2*3 - 2*full_LL
simple_AIC = 2*2 - 2*simple_LL
delta_AIC =  simple_AIC - full_AIC
print delta_AIC


# The probability that the simplified model is better than the full model is given by its Akaike weight.

# In[39]:

1 / (1 + np.exp(.5*delta_AIC))


# To put this in perspective, this probability is roughly the magnitude of flipping a fair coin eight hundred times and it only ever coming up heads.

# In[40]:

.5**800


# Since the full and simplified model are nested, we can also perform a Likelihood Ratio test, where the test statistic $D$ is $\chi^2$ distributed with one degree of freedom.  We reject the null hypothesis that the simplified model is correct.

# In[41]:

from scipy.stats import chi2
D = 2*(full_LL - simple_LL)
chi2_result = chi2.pdf(D,1)
print chi2_result
Ejemplo n.º 50
0
@author: Flavio Lichtenstein
@local: Unifesp DIS - Bioinformatica
'''

import numpy as np
from scipy.stats import chi2
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
# Calculate a few first moments:

df = 2
mean, var, skew, kurt = chi2.stats(df, moments='mvsk')
# Display the probability density function (pdf):

x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf')

# Alternatively, the distribution object can be called (as a function) to fix the shape, location and scale parameters. This returns a “frozen” RV object holding the given parameters fixed.
# Freeze the distribution and display the frozen pdf:

rv = chi2(df)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

vals = chi2.ppf([0.001, 0.5, 0.999], df)
print np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df))

# Generate random numbers:

r = chi2.rvs(df, size=10000)

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
Ejemplo n.º 51
0
    def run(self, dataSlice, slicePoint=None):
        #Get the visit information
        m5 = dataSlice[self.m5col]
        #Number of visits
        N = len(m5)
        
        #magnitudes to be sampled
        mag = np.arange(16,np.mean(m5),0.5) 
        #hold the distance between the completeness and contamination goals.
        res = np.zeros(mag.shape) 
        #make them nans for now
        res[:] = np.nan 

        #hold the measured noise-only variances 
        noiseonlyvar = np.zeros(self.numruns)

        #Calculate the variance at a reference magnitude and scale from that
        m0=20.
        sigmaref = 0.2 * (10.**(-0.2*m5)) * (10.**(0.2*m0))

        #run the simulations
        #Simulate the measured noise-only variances at a reference magnitude
        for i in np.arange(self.numruns):
            # random realization of the Gaussian error distributions
            scatter = np.random.randn(N)*sigmaref 
            noiseonlyvar[i] = np.var(scatter) # store the noise-only variance
            
        #Since we are treating the underlying signal being representable by a 
        #fixed-width gaussian, its variance pdf is a Chi-squared distribution 
        #with the degrees of freedom = visits. Since variances add, the variance 
        #pdfs convolve. The cumulative distribution function of the sum of two 
        #random deviates is the convolution of one pdf with a cdf. 

        #We'll consider the cdf of the noise-only variances because it's easier 
        #to interpolate
        noisesorted = np.sort(noiseonlyvar)
        #linear interpolation
        interpnoisecdf = UnivariateSpline(noisesorted,np.arange(self.numruns)/float(self.numruns),k=1,s=0)

        #We need a binned, signal-only variance probability distribution function for numerical convolution
        numsignalsamples = 100
        xsig = np.linspace(chi2.ppf(0.001, N),chi2.ppf(0.999, N),numsignalsamples)
        signalpdf = chi2.pdf(xsig, N)
        #correct x to the proper variance scale
        xsig = (self.signal**2.)*xsig/N
        pdfstepsize = xsig[1]-xsig[0]
        #Since everything is going to use this stepsize down the line,
        #normalize so the pdf integrates to 1 when summed (no factor of stepsize needed)
        signalpdf /= np.sum(signalpdf)

        #run through the sample magnitudes, calculate distance between cont 
        #and comp thresholds.
        #run until solution found.
        solutionfound=False
        
        for i,mref in enumerate(mag): 
            #i counts and mref is the currently sampled magnitude
            #Scale factor from m0
            scalefact = 10.**(0.4*(mref-m0))

            #Calculate the desired contamination threshold
            contthresh = np.percentile(noiseonlyvar,100.-100.*self.contamination)*scalefact

            #Realize the noise CDF at the required stepsize
            xnoise = np.arange(noisesorted[0]*scalefact,noisesorted[-1]*scalefact,pdfstepsize)
            
            #Only do calculation if near the solution:
            if (len(xnoise) > numsignalsamples/10) and (not solutionfound):
                noisecdf = interpnoisecdf(xnoise/scalefact)
                noisepdf = (noisecdf[1:]-noisecdf[:-1]) #turn into a noise pdf
                noisepdf /= np.sum(noisepdf)
                xnoise = (xnoise[1:]+xnoise[:-1])/2. #from cdf to pdf conversion

                #calculate and plot the convolution = signal+noise variance dist.
                convolution=0
                if len(noisepdf) > len(signalpdf):
                    convolution = np.convolve(noisepdf,signalpdf)
                else: 
                    convolution = np.convolve(signalpdf,noisepdf)
                xconvolved = xsig[0]+xnoise[0]+np.arange(len(convolution))*pdfstepsize

                #calculate the completeness threshold
                combinedcdf = np.cumsum(convolution)
                findcompthresh = UnivariateSpline(combinedcdf,xconvolved,k=1,s=0)
                compthresh = findcompthresh(1.-self.completeness)

                res[i] = compthresh - contthresh
                if res[i] < 0: solutionfound = True
        
        #interpolate for where the thresholds coincide
        #print res
        if np.sum(np.isfinite(res)) > 1:
            f1 = UnivariateSpline(mag[np.isfinite(res)],res[np.isfinite(res)],k=1,s=0)
            #sample the magnitude range at given resolution
            magsamples = np.arange(16,np.mean(m5),self.magres) 
            vardepth = magsamples[np.argmin(np.abs(f1(magsamples)))]
            return vardepth
        else:
            return min(mag)-1
Ejemplo n.º 52
0
def errfunc(p,x,y):
    """produce goodness measures for fitfunc applied to independant
variable x compared with data y for chi2(2) statistics"""
    chis = 2*y / fitfunc(p,x) #values relative to current model
    probs = chi2.pdf(chis,2) #probability for each value
    return log(probs).sum() #function goodness
ax.text(8.9, 60., '$\hat{\mu} = \\frac{1}{15}\sum x_i = %.1f$' % np.mean(data), fontsize=18,
        bbox={'facecolor': 'none', 'pad':14, 'ec': 'r'})   
fig.subplots_adjust(bottom=0.15)
pl.savefig('../chisquared_data.pdf')
pl.show()
fig.clf()
pl.close(fig)

# new figure
fig = pl.figure(figsize=(10,5), dpi=100)

# get the chi-squared value from the data
chisq = np.sum((data - np.mean(data))**2/(sigmad**2))

x = np.linspace(0., 50., 1000) 
c2pdf = chi2.pdf(x, nu)

# plot chi-squared pdf
pl.plot(x, c2pdf)
pl.plot([chisq, chisq], [0, np.max(c2pdf)], 'k--')

pl.fill_between(x, np.zeros(len(x)), c2pdf, where=x>=chisq, alpha=0.6, facecolor='green', interpolate=True)
ax = pl.gca()
ax.set_xlabel('$\chi^2$', fontsize=14)
ax.set_ylabel('$p(\chi^2)$', fontsize=14)
ax.text(chisq+2, 0.07, '$p_{14}(\chi^2) = %.2f$' % chisq, fontsize=16)

# cumulative function
c2cdf = chi2.cdf(x, nu)
pvalue = 1.-c2cdf[x<=chisq][-1]
print pvalue
Ejemplo n.º 54
0
    def plot_A0(self):
        if self.TYPE != "gNL":
            plot_hist(plt, self.gA0, clr="skyblue", alp=ALPHA, ht="stepfilled")

        sG = np.sqrt(np.var(self.gA0))
        mG = np.mean(self.gA0)
        amin, amax = plt.xlim()
        alist = np.arange(3 * amin, amax * 3, amax / 250.0)
        theoryGdist = norm.pdf(alist, loc=mG, scale=sG)
        if self.TYPE != "gNL":
            plt.plot(
                alist, theoryGdist, self.clrs[0], linestyle=self.ls[0], linewidth=LW, label=self.TYPELABEL + "$=0$"
            )

        for i in range(len(self.fgnls)):
            if self.theoryplot == False:
                lbl = self.TYPELABEL + "$=$" + NtoSTR(self.fgnls[i])
            else:
                lbl = None

            if self.TYPE == "fNL":
                plot_hist(plt, self.fgNLA0[i], clr=self.clrs[i + 1], alp=ALPHA, labl=lbl, ht="stepfilled")
            if self.TYPE == "gNL":
                # plot_hist(plt, self.fgNLA0[i]-self.gA0+6*self.fgnls[i]*self.efolds*self.A0const-6*self.fgnls[i]*self.phisq, clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled')
                neg = np.min(self.fgNLA0[i] - self.gA0)
                const = 9 * 6.0 * self.fgnls[i] * 1.5e-8
                # scl=6.0*self.A0const*self.Nconst*self.fgnls[i]
                plot_hist(
                    plt,
                    (self.fgNLA0[i] - self.gA0 + const) / (1 - 9.0 * 3.0 * self.fgnls[i] * 1.5e-8),
                    clr=self.clrs[i + 1],
                    alp=ALPHA,
                    labl=lbl,
                    ht="stepfilled",
                )

            amin, amax = plt.xlim()
            alist = np.arange(3 * amin, amax * 3, 0.001)
            if self.theoryplot:
                if self.TYPE == "fNL":
                    theorynGdist = norm.pdf(
                        alist,
                        loc=mG,
                        scale=np.sqrt(16.0 * self.A0const * self.Nconst * self.fgnls[i] ** 2.0 + sG ** 2.0),
                    )
                if self.TYPE == "gNL":
                    scl = 6.0 * self.A0const * self.Nconst * self.fgnls[i]
                    # scl=2.0*self.A0const*self.efolds*self.fgnls[i]
                    theorynGdist = np.sign(self.fgnls[i]) * chi2.pdf(alist, 1, scale=scl)
                    # new
                    """
                    sigmaphi00=np.sqrt(self.A0const*self.efolds)
                    a0=24.*np.pi*self.fgnls[i]
                    theorynGdist=1./a0/np.sqrt(2.*np.pi)/sigmaphi00 * np.sqrt(a0/(alist+a0*sigmaphi00^2))*np.exp(-(alist+a0*sigmaphi00**2.0)/(2.*a0*sigmaphi00**2.0))
                    self.theorynGdist=theorynGdist
                    """
                LW1 = self.get_LW1(i)

                plt.plot(
                    alist,
                    theorynGdist,
                    self.clrs[i + 1],
                    linestyle=self.ls[i + 1],
                    linewidth=LW1,
                    label=self.TYPELABEL + "$=$" + NtoSTR(self.fgnls[i]),
                )

        plt.xlabel(r"$A_0$")
        plt.ylabel(r"$p(A_0)$")
        plt.yscale("log")
        if self.TYPE == "fNL":
            plt.xlim(-1.0, 1.0)
            plt.ylim(0.1, 100)
        if self.TYPE == "gNL":
            # plt.xscale('log')
            plt.ylim(0.05, 1000)
            plt.xlim(-0.02, 0.25)

        plt.legend()
Ejemplo n.º 55
0
Q = inv(sqrtm(Sigma))  

# == Generate observations of the normalized sample mean == #
error_obs = np.empty((2, replications))
for i in range(replications):
    # == Generate one sequence of bivariate shocks == #
    X = np.empty((2, n))
    W = dw.rvs(n)
    U = du.rvs(n)
    # == Construct the n observations of the random vector == #
    X[0, :] = W
    X[1, :] = W + U
    # == Construct the i-th observation of Y_n == #
    error_obs[:, i] = np.sqrt(n) * X.mean(axis=1)

# == Premultiply by Q and then take the squared norm == #
temp = np.dot(Q, error_obs)
chisq_obs = np.sum(temp**2, axis=0)

# == Plot == #
fig, ax = plt.subplots()
xmax = 8
ax.set_xlim(0, 8)
xgrid = np.linspace(0, 8, 200)
lb = "Chi-squared with 2 degrees of freedom"
ax.plot(xgrid, chi2.pdf(xgrid, 2), 'k-', lw=2, label=lb)
ax.legend()
ax.hist(chisq_obs, bins=50, normed=True)

plt.show()
Ejemplo n.º 56
0
import matplotlib.pyplot as plt
import numpy as np
from numpy import linspace
from scipy.stats import chisquare, chi2
from scipy.stats.kde import gaussian_kde

m = 1000
n = 100
k = 10

y = []
for i in range(m):
    x = [random.randint(1, k) for i in range(n)]
    y.append(chisquare(x)[0])

mean_y, std_y = np.mean(y), np.std(y)
y = [(i - mean_y) / sqrt(np.std(y)) for i in y]
t = min(y)
y = [i - t for i in y]

pdf = gaussian_kde(y)
a = linspace(min(y), max(y), len(y) // 10)

fig, ax = plt.subplots(2, 1)
fig.subplots_adjust(wspace=0)

ax[0].plot(a, pdf(a))
ax[1].plot(a, chi2.pdf(a,k-1))

plt.show()
xu = np.linspace(chi2.ppf(0.01, dof), chi2.ppf(0.99, dof), 100)
ax.axvline(x=dof/df,color='k', linestyle='dashed',lw=4, label='UGC11680NED01 $\chi^2$')
ax.hist(chi_sfh/df,bins=30, normed=False,weights=weights, histtype='step', lw=3,label='All AGNs $\chi^2$ reduced distribution')
ax.legend(loc='best', frameon=False)
ax.set_ylabel('Probability density $\chi ^2$')
ax.set_xlabel('$x$')
plt.show()

1+ 1./2. + 1./3. + 1./4. +1./5. + 1./6. +1./7. +1./8. + 1./9. + 1./10. + 1./11. +1./12. +1./13. + 1./14. + 1./15. +1./16. +1./17. +1./18. +1./19. +1./20. +1./21.+ 1./22. +1./23. +1./24.

3.7759581777535067

2(1368) = 2736


n=1368.9986218003824*(chi2.pdf(x, df))


2736+3.7759581777535067

varianza= 52.34286921995921
media= 1368


mass 8, color 1

2736+2.45
varianza= 52.33020160480943
media= 1368

mass 9 color 1
Ejemplo n.º 58
0
def test():
	x = linspace(0.1, 25, 200)
	for dof in [1, 2, 3, 5, 10, 50]:
		plot(x, chi2.pdf(x, dof))
Ejemplo n.º 59
0
def draw_figures():

    pfile = '/auto/tdrive/mschachter/data/aggregate/decoders_pairwise_coherence_multi_freq.h5'

    agg = AggregatePairwiseDecoder.load(pfile)

    nbands = agg.df['band'].max()
    sample_rate = 381.4697265625
    freqs = get_freqs(sample_rate)

    g = agg.df.groupby(['bird', 'block', 'segment', 'hemi'])

    """
    # TODO: compute the average likelihood ratio between intecept-only and full model for all sites!
    i = (agg.df['bird'] == 'GreBlu9508M') & (agg.df['block'] == 'Site4') & (agg.df['segment'] == 'Call1') & (agg.df['hemi'] == 'L') & (agg.df['band'] == 0)
    assert i.sum() == 1
    full_likelihood_for_null = agg.df['likelihood'][i].values[0]
    null_likelihood = 1.63  # for GreBlu9508_Site4_Call1_L
    null_likelihood_ratio = 2*(null_likelihood - full_likelihood_for_null)
    print 'full_likelihood_for_null=',full_likelihood_for_null
    print 'null_likelihood=',null_likelihood
    print 'null_likelihood_ratio=',null_likelihood_ratio
    """

    full_likelihoods = list()
    likelihood_ratios = list()
    pccs = list()
    pcc_thresh = 0.25
    single_band_likelihoods = list()
    single_band_pccs = list()
    for (bird,block,seg,hemi),gdf in g:
        # get the likelihood of the fullmodel
        i = gdf['band'] == 0
        assert i.sum() == 1

        num_samps = gdf[i]['num_samps'].values[0]
        print 'num_samps=%d' % num_samps
        full_likelihood = -gdf[i]['likelihood'].values[0] * num_samps
        pcc = gdf[i]['pcc'].values[0]

        if pcc < pcc_thresh:
            continue

        full_likelihoods.append(full_likelihood)
        pccs.append(pcc)

        # get the likelihood per frequency band
        ratio_by_band = np.zeros(nbands)
        single_likelihood_by_band = np.zeros(nbands)
        single_pcc_band = np.zeros(nbands)
        for k in range(nbands):
            i = (gdf['band'] == k+1) & (gdf['exfreq'] == True)
            assert i.sum() == 1

            num_samps2 = gdf[i]['num_samps'].values[0]
            assert num_samps2 == num_samps
            leftout_likelihood = -gdf[i]['likelihood'].values[0] * num_samps

            i = (gdf['band'] == k+1) & (gdf['exfreq'] == False)
            assert i.sum() == 1

            num_samps3 = gdf[i]['num_samps'].values[0]
            assert num_samps3 == num_samps2
            single_leftout_likelihood = -gdf[i]['likelihood'].values[0] * num_samps

            pcc = gdf[i]['pcc'].values[0]

            print '(%s,%s,%s,%s,%d) leftout=%0.6f, full=%0.6f, single=%0.6f, single_pcc=%0.6f, num_samps=%d' % \
                  (bird, block, seg, hemi, k, leftout_likelihood, full_likelihood, single_leftout_likelihood, pcc, num_samps)

            # compute the likelihood ratio
            lratio = -2*(leftout_likelihood - full_likelihood)
            ratio_by_band[k] = lratio
            single_likelihood_by_band[k] = single_leftout_likelihood
            single_pcc_band[k] = pcc

        likelihood_ratios.append(ratio_by_band)
        single_band_likelihoods.append(single_likelihood_by_band)
        single_band_pccs.append(single_pcc_band)

    pccs = np.array(pccs)
    likelihood_ratios = np.array(likelihood_ratios)
    full_likelihoods = np.array(full_likelihoods)
    single_band_likelihoods = np.array(single_band_likelihoods)
    single_band_pccs = np.array(single_band_pccs)

    # exclude segments whose likelihood ratio goes below zero
    # i = np.array([np.any(lrat < 0) for lrat in likelihood_ratios])
    i = np.ones(len(likelihood_ratios), dtype='bool')
    print 'i.sum()=%d' % i.sum()

    # compute significance threshold
    x = np.linspace(1, 150, 1000)
    df = 12
    p = chi2.pdf(x, df)
    sig_thresh = max(x[p > 0.01])

    # compute mean and std
    lrat_mean = likelihood_ratios[i, :].mean(axis=0)
    lrat_std = likelihood_ratios[i, :].std(axis=0, ddof=1)

    single_l_mean = single_band_likelihoods[i, :].mean(axis=0)
    single_l_std = single_band_likelihoods[i, :].std(axis=0, ddof=1)

    single_pcc_mean = single_band_pccs[i, :].mean(axis=0)
    single_pcc_std = single_band_pccs[i, :].std(axis=0, ddof=1)

    fig = plt.figure(figsize=(24, 16))
    plt.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.99, hspace=0.40, wspace=0.20)

    ax = plt.subplot(2, 3, 1)
    plt.plot(full_likelihoods, pccs, 'go', linewidth=2.0)
    plt.xlabel('log Likelihood')
    plt.ylabel('PCC')
    plt.axis('tight')

    ax = plt.subplot(2, 3, 2)
    for k,lrat in enumerate(likelihood_ratios[i, :]):
        plt.plot(freqs, lrat, '-', linewidth=2.0, alpha=0.7)
        plt.xlabel('Frequency (Hz)')
        plt.ylabel('Likelihood Ratio')
        plt.axis('tight')

    ax = plt.subplot(2, 3, 4)
    nsamps = len(likelihood_ratios)
    plt.errorbar(freqs, single_pcc_mean, yerr=single_pcc_std/np.sqrt(nsamps), ecolor='r', elinewidth=3.0, fmt='k-', linewidth=7.0, alpha=0.75)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('PCC')
    plt.title('Mean Single Band Decoder PCC')
    plt.axis('tight')

    ax = plt.subplot(2, 3, 5)
    plt.errorbar(freqs, single_l_mean, yerr=single_l_std/np.sqrt(nsamps), ecolor='r', elinewidth=3.0, fmt='k-', linewidth=7.0, alpha=0.75)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('log Likelihood')
    plt.title('Mean Single Band Decoder Likelihood')
    plt.axis('tight')

    ax = plt.subplot(2, 3, 6)
    plt.errorbar(freqs, lrat_mean, yerr=lrat_std/np.sqrt(nsamps), ecolor='r', elinewidth=3.0, fmt='k-', linewidth=7.0, alpha=0.75)
    plt.plot(freqs, np.ones_like(freqs)*sig_thresh, 'k--', linewidth=7.0, alpha=0.75)
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Likelihood Ratio')
    plt.title('Mean Likelihood Ratio')
    plt.axis('tight')
    plt.ylim(0, lrat_mean.max())



    fname = os.path.join(get_this_dir(), 'figs.svg')
    plt.savefig(fname, facecolor='w', edgecolor='none')

    plt.show()