Python chisqprob Beispiele, scipy.stats.chisqprob Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_mstats_basic.py Projekt: SytseK/scipy

def test_chisquare_masked_arrays():
    # The other tests were taken from the tests for stats.chisquare, so
    # they don't test the function with masked arrays.  Here masked arrays
    # are tested.
    obs = np.array([[8, 8, 16, 32, -1], [-1, -1, 3, 4, 5]]).T
    mask = np.array([[0, 0, 0, 0, 1], [1, 1, 0, 0, 0]]).T
    mobs = ma.masked_array(obs, mask)
    expected_chisq = np.array([24.0, 0.5])

    chisq, p = mstats.chisquare(mobs)
    assert_array_equal(chisq, expected_chisq)
    assert_array_almost_equal(
        p, stats.chisqprob(expected_chisq,
                           mobs.count(axis=0) - 1))

    chisq, p = mstats.chisquare(mobs.T, axis=1)
    assert_array_equal(chisq, expected_chisq)
    assert_array_almost_equal(
        p, stats.chisqprob(expected_chisq,
                           mobs.T.count(axis=1) - 1))

    # When axis=None, the two values should have type np.float64.
    chisq, p = mstats.chisquare([1, 2, 3], axis=None)
    assert_(isinstance(chisq, np.float64))
    assert_(isinstance(p, np.float64))
    assert_equal(chisq, 1.0)
    assert_almost_equal(p, stats.chisqprob(1.0, 2))

Beispiel #2

0

Datei anzeigen

Datei: probit.py Projekt: ShuyaoHong/pysal

def sp_tests(reg):
    """
    Calculates tests for spatial dependence in Probit models

    Parameters
    ----------

    reg         : regression object
                  output instance from a probit model            
    """    
    if reg.w:
        w = reg.w.sparse
        Phi = reg.predy
        phi = reg.phiy                
        #Pinkse_error:
        Phi_prod = Phi * (1 - Phi)
        u_naive = reg.u_naive
        u_gen = reg.u_gen
        sig2 = np.sum((phi * phi) / Phi_prod) / reg.n
        LM_err_num = np.dot(u_gen.T,(w * u_gen))**2
        trWW = np.sum((w*w).diagonal())
        trWWWWp = trWW + np.sum((w*w.T).diagonal())
        LM_err = float(1.0 * LM_err_num / (sig2**2 * trWWWWp))
        LM_err = np.array([LM_err,chisqprob(LM_err,1)])
        #KP_error:
        moran = moran_KP(reg.w,u_naive,Phi_prod)
        #Pinkse-Slade_error:
        u_std = u_naive / np.sqrt(Phi_prod)
        ps_num = np.dot(u_std.T, (w * u_std))**2
        trWpW = np.sum((w.T*w).diagonal())
        ps = float(ps_num / (trWW + trWpW))
        ps = np.array([ps,chisqprob(ps,1)]) #chi-square instead of bootstrap.
    else:
        raise Exception, "W matrix not provided to calculate spatial test."
    return LM_err,moran,ps

Beispiel #3

0

Datei anzeigen

    def inspect_output_by_filter(self,
                                 rez,
                                 dat,
                                 doplot=False,
                                 test=False,
                                 sig_clips=[5, 3, 2],
                                 sig_test=[False, False, True]):
        p = rez.values()[0][1]
        myoutput = rez.values()[0][0]
        new = rez.values()[0][2]
        filt = rez.keys()[0]

        ret = {}
        ret.update({"all": self._extract_info(p, myoutput.sd_beta, myoutput)})
        err = dat[2]
        tmp = (dat[1] - self.modelfunc_small_te(p, dat[0])) / err
        dof = tmp.shape[0] - myoutput.beta.shape[0]
        chisq = (tmp**2).sum()
        ret['all'].update({"ndata": dat[0].shape[0], \
                            "chisq": chisq, "dof": dof, "p_chi": chisqprob(chisq,dof),
                            "normalcy_prob": normaltest(tmp)[1]})

        for s in enumerate(sig_clips):
            if sig_test[s[0]] and not test:
                continue
            sig = s[1]
            # get the indices of those inside and out of the clip area
            tmpisig = (abs(tmp) < sig).nonzero()[0]
            tmpisige = (abs(tmp) > sig).nonzero()[0]
            frac_less_than_sig = float(tmpisig.shape[0]) / dat[0].shape[0]
            # print frac_less_than_sig
            if frac_less_than_sig < 1.0:
                out = self._filt_run([dat[0][tmpisig],dat[1][tmpisig],err[tmpisig]],\
                            filt,do_sim=False,vplot=False)
                p = out[1]
                myoutput = out[0]
                t = "-test" if sig_test[s[0]] else ""

                ret.update({
                    "sig" + str(sig) + t:
                    self._extract_info(p, myoutput.sd_beta, myoutput)
                })
                tmp = (dat[1][tmpisig] - self.modelfunc_small_te(
                    p, dat[0][tmpisig])) / err[tmpisig]
                dof = tmp.shape[0] - myoutput.beta.shape[0]
                chisq = (tmp**2).sum()
                try:
                    ntest = normaltest(tmp)[1]
                except:
                    ntest = 0.0
                ret["sig" + str(sig) + t].update({"ndata": dat[0][tmpisig].shape[0], \
                                    "chisq": chisq, "dof": dof, "p_chi": chisqprob(chisq,dof),
                                    "normalcy_prob": ntest, "frac_data_remaining": frac_less_than_sig })
                if doplot:
                    plot(dat[0][tmpisige], dat[1][tmpisige], ".")

        return ret

Beispiel #4

0

Datei anzeigen

    def _calculate_LRTs(self):
        """Run likelihood ratio test if there are enough results  """
        if all([m in self.keys() for m in [1, 2]]):
            D = -2 * self[1].lnL + 2 * self[2].lnL
            pval = chisqprob(D, 2)
            self.LRT_m1m2 = (D, pval)

        if all([m in self.keys() for m in [7, 8]]):
            D = -2 * self[7].lnL + 2 * self[8].lnL
            pval = chisqprob(D, 2)
            self.LRT_m7m8 = (D, pval)

Beispiel #5

0

Datei anzeigen

Datei: Parse_multitree_paml.py Projekt: xji3/IGCCodonSimulation

    def _calculate_LRTs(self):
        """Run likelihood ratio test if there are enough results  """
        if all( [m in self.keys() for m in [1,2]] ):
            D = -2 * self[1].lnL + 2 * self[2].lnL
            pval = chisqprob(D,2)
            self.LRT_m1m2 = (D, pval)

        if all( [m in self.keys() for m in [7,8]] ):
            D = -2 * self[7].lnL + 2 * self[8].lnL
            pval = chisqprob(D,2)
            self.LRT_m7m8 = (D, pval)

Beispiel #6

0

Datei anzeigen

 def LR(self):
     try:
         return self._cache['LR']
     except AttributeError:
         self._cache = {}
         P = 1.0 * np.sum(self.y) / self.n
         LR = float(-2 * (self.n * (P * np.log(P) +
                                    (1 - P) * np.log(1 - P)) - self.logl))
         self._cache['LR'] = (LR, chisqprob(LR, self.k))
     except KeyError:
         P = 1.0 * np.sum(self.y) / self.n
         LR = float(-2 * (self.n * (P * np.log(P) +
                                    (1 - P) * np.log(1 - P)) - self.logl))
         self._cache['LR'] = (LR, chisqprob(LR, self.k))
     return self._cache['LR']

Beispiel #7

0

Datei anzeigen

Datei: probit.py Projekt: CartoDB/pysal

 def LR(self):
     try:
         return self._cache['LR']
     except AttributeError:
         self._cache = {}
         P = 1.0 * np.sum(self.y) / self.n
         LR = float(
             -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl))
         self._cache['LR'] = (LR, chisqprob(LR, self.k))
     except KeyError:
         P = 1.0 * np.sum(self.y) / self.n
         LR = float(
             -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl))
         self._cache['LR'] = (LR, chisqprob(LR, self.k))
     return self._cache['LR']

Beispiel #8

0

Datei anzeigen

Datei: mlens3.py Projekt: stefanv/MLTP

	def inspect_output_by_filter(self,rez,dat,doplot=False,test=False,
	                             sig_clips=[5, 3, 2], sig_test=[False,False,True]):
		p = rez.values()[0][1]
		myoutput = rez.values()[0][0]
		new  = rez.values()[0][2]
		filt = rez.keys()[0]

		ret = {}
		ret.update({"all": self._extract_info(p,myoutput.sd_beta,myoutput)})
		err = dat[2]
		tmp = (dat[1] - self.modelfunc_small_te(p,dat[0]))/err
		dof = tmp.shape[0] -  myoutput.beta.shape[0]
		chisq = (tmp**2).sum()
		ret['all'].update({"ndata": dat[0].shape[0], \
		                    "chisq": chisq, "dof": dof, "p_chi": chisqprob(chisq,dof),
		                    "normalcy_prob": normaltest(tmp)[1]})

		for s in enumerate(sig_clips):
			if sig_test[s[0]] and not test:
				continue
			sig = s[1]
			# get the indices of those inside and out of the clip area
			tmpisig = (abs(tmp) < sig).nonzero()[0]
			tmpisige = (abs(tmp) > sig).nonzero()[0]
			frac_less_than_sig =  float(tmpisig.shape[0])/dat[0].shape[0]
			# print frac_less_than_sig
			if frac_less_than_sig < 1.0:
				out = self._filt_run([dat[0][tmpisig],dat[1][tmpisig],err[tmpisig]],\
 			 					   	  filt,do_sim=False,vplot=False)
				p        = out[1]
				myoutput = out[0]
				t = "-test" if sig_test[s[0]] else ""
					
				ret.update({"sig" + str(sig) + t: self._extract_info(p,myoutput.sd_beta,myoutput)})
				tmp = (dat[1][tmpisig] - self.modelfunc_small_te(p,dat[0][tmpisig]))/err[tmpisig]
				dof = tmp.shape[0] - myoutput.beta.shape[0]
				chisq = (tmp**2).sum()
				try:
					ntest =  normaltest(tmp)[1]
				except:
					ntest = 0.0
				ret["sig" + str(sig) + t].update({"ndata": dat[0][tmpisig].shape[0], \
				                    "chisq": chisq, "dof": dof, "p_chi": chisqprob(chisq,dof),
				                    "normalcy_prob": ntest, "frac_data_remaining": frac_less_than_sig })
				if doplot:
					plot(dat[0][tmpisige],dat[1][tmpisige],".")
			
		return ret

Beispiel #9

0

Datei anzeigen

Datei: ROCalyzer.py Projekt: ostrokach/biskit

def pfisher( pvalues ):
    """
    Combine independent P-values into one according to

    Fisher, R. A. (1948) Combining independent tests of significance.
    American Statistician, vol. 2, issue 5, page 30.

    ('Fisher method' or 'inverse ChiSquare method') See also book:
    Walter W. Piegorsch, A. John Bailer: Analyzing Environmental Data.
    Wiley 2005

    @param pvalues: list of independent P-values
    @type  pvalues: [ float ]
    @return: P-value 
    @rtype: float
    """
    ## stats.mannwhitneyu minimal P ~ stats.zprob( 8.2 );
    ## all below becomes 0. which is not handled by the fisher test 
    clipped = N.clip( pvalues, 1.0e-16, 1.0 )

    x2 = -2 * N.sum( N.log( clipped ) )

    if not USING_SCIPY:
        x2 = float( x2 )

    return stats.chisqprob( x2, 2*len(pvalues) )

Beispiel #10

0

Datei anzeigen

Datei: DartModules.py Projekt: blanchedanastasi/dartqc

    def _calculate_hwe(self, snp, genotypes):

        """
        Calculates p-value for HWE using ChiSquare statistic: remove missing, get observed counts, get observed
        frequencies, get expected counts, calculate test values using (O-E)**2 / E and return ChiSquare probability
        with 1 degree of Freedom (bi-allelic SNP).

        """

        adjusted_samples = self.sample_size - genotypes.count(self.missing)
        hetero_obs, major_obs, minor_obs = self._get_observed(genotypes)

        try:
            p = (major_obs + (hetero_obs/2)) / adjusted_samples
            q = (minor_obs + (hetero_obs/2)) / adjusted_samples
        except ZeroDivisionError:
            #print("Detected complete missing data in SNP:", snp)
            return 0

        if (p + q) != 1:
            ValueError("Sum of observed allele frequencies (p + q) does not equal one.")

        hetero_exp, major_exp, minor_exp = self._get_expected(p, q, adjusted_samples)

        try:
            hetero_test = ((hetero_obs-hetero_exp)**2)/hetero_exp
            major_test = ((major_obs-major_exp)**2)/major_exp
            minor_test = ((minor_obs-minor_exp)**2)/minor_exp
        except ZeroDivisionError:
            return 0

        return stats.chisqprob(sum([hetero_test, major_test, minor_test]), 1)

Beispiel #11

0

Datei anzeigen

Datei: haps_filters.py Projekt: MMesbahU/selectionTools

def hardy_weinberg_asymptotic(obs_het, obs_a , obs_b):
    obs_het = float(obs_het)
    obs_a = float(obs_a)
    obs_b = float(obs_b)
    sample_size = obs_het + obs_a + obs_b
    p = (((2 * obs_a) + obs_het) / ( 2 * (sample_size)))
    q = 1 - p 
    exp_a = p * p * sample_size 
    exp_b = q * q * sample_size
    exp_ab = 2 * p * q * sample_size
    
    # get chiSquare values
    if(exp_a == 0):
        chi_a = 0
    else:
        chi_a = ((obs_a - exp_a) * 2.0) / exp_a
    if(exp_b == 0):
        chi_b = 0
    else:
        chi_b = ((obs_b - exp_b) * 2.0) / exp_b
    if(exp_ab == 0):
        chi_ab = 0
    else:
        chi_ab = ((obs_het - exp_ab) * 2.0 ) / exp_ab
    chi_sq_total = chi_a + chi_b + chi_ab
    return stats.chisqprob(chi_sq_total, 1)

Beispiel #12

0

Datei anzeigen

    def gof(self, x, y, ye):
        '''
        Computes GoF test statistics and other diagnostical tests

        Returns:
        --------
        - GoF test: Chi^2, p-value, and ddof
        - Normality of residuals: K^2 and p-value
        '''
        res = {}
        resid = y - self(x)
        chisq = np.sum(((resid) / ye)**2)
        ddof = len(x) - len(filter(
            None, self.errors()))  # number of estimated parameters
        chisq_pvalue = chisqprob(chisq, ddof)
        gof = (chisq, chisq_pvalue, ddof)
        resid = normaltest(resid)
        ym = y.mean()
        SStot = np.sum((y - ym)**2)
        SSerr = np.sum((y - self(x))**2)
        Rsquared = 1.0 - SSerr / SStot
        # Besides being buggy, this test for homoscedasticity is supposed to work only
        # for linear regressions, hence is not suited for our case, but I'll keep it
        # here until I figure out an alternative. Remember to uncomment the import for
        # OLS ontop.
        #        regresults = OLS(resid ** 2, np.c_[x, x**2]).fit()
        #        LM =regresults.rsquared
        #        LM_pvalue = chisqprob(LM, len(x) - ddof)
        #        white = (LM, LM_pvalue)
        #        return gof, resid, white
        return gof, resid, Rsquared

Beispiel #13

0

Datei anzeigen

Datei: pihm_methods.py Projekt: mqwilber/parasite_mortality

    def test_for_pihm_w_likelihood(self, guess=[10, -5],
                        k_array=np.linspace(0.05, 2, 100),
                        fixed_pre=False, disp=True):
        """
        Test a dataset for parasite induced host mortality using the likelihood
        method

        This method compares a reduced model (negative binomial distribution)
        to a full model (Negative binomail with PIHM).  The two models are
        nested and differ by two parameters: a and b.  This amounts to fitting
        the model a negative binomial distribution to the data, then fitting
        the full model to the data and comparing likelihoods using a likelihood
        ratio test.  The likelihood ratio should be approximately chi-squared
        with the dof equal to the difference in the parameters.

        Parameters
        ----------
        guess : list
            Guesses for a and b
        k_array : array
            Array of values over which to search to best fit k
        fixed_pre : bool
            If True, the premortality parameters are fixed (mup and kp). Else,
            they are jointly estimated from the data
        disp : bool
            If True, convergence message is printed.  If False, no convergence
            method is printed


        Returns
        -------
        : chi_squared valued, p-value, full nll, reduced nll,
        """

        # No params are known
        if not fixed_pre:

            # Get full nll
            params = self.likelihood_method(full_fit=True, guess=guess,
                        disp=disp)
            full_nll = likefxn1(params, self.data)

            mle_fit = mod.nbinom.fit_mle(self.data, k_array=k_array)

            red_nll = comp.nll(self.data, mod.nbinom(*mle_fit))

        # Params are known
        else:

            params = self.likelihood_method(full_fit=False, guess=guess,
                        disp=disp)
            full_nll = likefxn2(params[2:], self.data, self.mup, self.kp)

            red_nll = comp.nll(self.data, mod.nbinom(self.mup, self.kp))

        # Approximately chi-squared...though this is a large sample size approx
        chi_sq = 2 * (-full_nll - (-red_nll))
        prob = chisqprob(chi_sq, 2)

        return chi_sq, prob, full_nll, red_nll, params, (mup, kp, a, b)

Beispiel #14

0

Datei anzeigen

 def get_pValue(self, mutheta, sigma=None):
     minusPointLogLike = self.like(mutheta, sigma)
     minusMaxLogLike = self.ml
     size = 2
     CS = 2.0 * (minusPointLogLike - minusMaxLogLike)
     pValue = stats.chisqprob(CS, size)
     return pValue

Beispiel #15

0

Datei anzeigen

Datei: toy_2D_fits.py Projekt: kcrum/toy_2D_fit

def fit1D(binnedE, binnedT, fracsE, fracsT, nevents, PearsonErrs=True,
          debug=False):
    # Concatenate data and prediction vectors
    datavec = np.append(binnedE,binnedT)
    predvec = [np.append(fracsE[0],fracsT[0]), np.append(fracsE[1],fracsT[1])]
    # Define inputs for minimizer
    predfunc = lambda p: p[0]*predvec[0] + p[1]*predvec[1]
    func = lambda : 1
    if PearsonErrs: func = lambda p: (datavec - predfunc(p))/np.sqrt(predfunc(p))
    else: func = lambda p: (datavec - predfunc(p))/np.sqrt(datavec)

    pfit, pcov, infodict, errmsg, success = sp.optimize.leastsq(func, nevents,
                                                                full_output=1)
    chi2 = sum([elem**2 for elem in infodict['fvec']]) 
    #mychi2 = sum([(datavec[i] - predfunc(pfit)[i])**2./predfunc(pfit)[i] for i in xrange(len(datavec))])  ### This just equals 'chi2' calculated above
    dof = datavec.size - 2
    pval = st.chisqprob(chi2, dof)
    if debug:
        print '---------------------- 1-D Fit --------------------------------'
        print 'Best fits: %s' % pfit
        print 'Cov. mat.: %s' % pcov
        print 'Chi^2: %s' % chi2
        print 'd.o.f.: %s' % dof
        print 'P-value: %s' % pval
        print '---------------------------------------------------------------'
    return pfit, pcov, chi2, pval

Beispiel #16

0

Datei anzeigen

Datei: gof.py Projekt: flaviovdf/vodlibs

def chisq_poisson(data):
    '''
    Tests if the data comes from a Poisson distribution. This is done using
    the Pearson Chi-Square test. Each value from the data given is treated like
    a categorical attribute, where the number of occurrences of the value is
    tested against the expected occurrences if the data came from a Poisson
    distribution. The hypothesis are:
        
        * H0 (null) - The data comes from a poisson distribution
        * H1 - The data does NOT comes from a poisson distribution
    
    Arguments
    ---------
    data: array like
        Array with observations
    
    Returns
    -------
    (chi-square value, p-value): The chi-square value found and a p-value
                                 for the null hypothesis.
    
    Notes
    -----
    This implementation does not do any special treatment for values with
    small number of occurrences. 
    '''
    all_freqs, expected_freqs = _poisson_inputs(data)
    chisq = stats.chisquare(all_freqs, expected_freqs)[0]
    pval = stats.chisqprob(chisq, len(all_freqs) - 2)
    return chisq, pval

Beispiel #17

0

Datei anzeigen

Datei: stats.py Projekt: gorlins/PyMVPA

def chisquare(obs, exp=None):
    """Compute the chisquare value of a contingency table with arbitrary
    dimensions.

    If no expected frequencies are supplied, the total N is assumed to be
    equally distributed across all cells.

    Returns: chisquare-stats, associated p-value (upper tail)
    """
    obs = N.array(obs)

    # get total number of observations
    nobs = N.sum(obs)

    # if no expected value are supplied assume equal distribution
    if exp == None:
        exp = N.ones(obs.shape) * nobs / N.prod(obs.shape)

    # make sure to have floating point data
    exp = exp.astype(float)

    # compute chisquare value
    chisq = N.sum((obs - exp )**2 / exp)

    # return chisq and probability (upper tail)
    return chisq, stats.chisqprob(chisq, N.prod(obs.shape) - 1)

Beispiel #18

0

Datei anzeigen

Datei: util.py Projekt: skyl/NHANES-opensource

def computeContingencyTablePValue(*observedTuples):
    if len(observedTuples) == 0: return None

    rowSums = []

    for row in observedTuples:
        rowSums.append(float(sum(row)))

    columnSums = []
    for i in range(len(observedTuples[0])):
        columnSum = 0.0

        for row in observedTuples:
            columnSum += row[i]

        columnSums.append(float(columnSum))

    grandTotal = float(sum(rowSums))
    observedTestStatistic = 0.0

    for i in range(len(observedTuples)):
        for j in range(len(row)):
            expectedValue = (rowSums[i]/grandTotal)*(columnSums[j]/grandTotal)*grandTotal
            observedValue = float(observedTuples[i][j])
            observedTestStatistic += ((observedValue - expectedValue)**2) / expectedValue

    degreesFreedom = (len(columnSums) - 1) * (len(rowSums) - 1)

    from scipy.stats import chisqprob
    return chisqprob(observedTestStatistic, degreesFreedom)

Beispiel #19

0

Datei anzeigen

Datei: stats.py Projekt: Python3pkg/PyMVPA

def chisquare(obs, exp='uniform'):
    """Compute the chisquare value of a contingency table with arbitrary
    dimensions.

    Parameters
    ----------
    obs : array
      Observations matrix
    exp : ('uniform', 'indep_rows') or array, optional
      Matrix of expected values of the same size as `obs`.  If no
      array is given, then for 'uniform' -- evenly distributes all
      observations.  In 'indep_rows' case contingency table takes into
      account frequencies relative across different columns, so, if
      the contingency table is predictions vs targets, it would
      account for dis-balance among different targets.  Although
      'uniform' is the default, for confusion matrices 'indep_rows' is
      preferable.

    Returns
    -------
    tuple
     chisquare-stats, associated p-value (upper tail)
    """
    obs = np.array(obs)

    # get total number of observations
    nobs = np.sum(obs)

    # if no expected value are supplied assume equal distribution
    if not isinstance(exp, np.ndarray):
        ones = np.ones(obs.shape, dtype=float)
        if exp == 'indep_rows':
            # multiply each column
            exp = np.sum(obs, axis=0)[None, :] * ones / obs.shape[0]
        elif exp == 'indep_cols':
            # multiply each row
            exp = np.sum(obs, axis=1)[:, None] * ones / obs.shape[1]
        elif exp == 'uniform':
            # just evenly distribute
            exp = nobs * np.ones(obs.shape, dtype=float) / np.prod(obs.shape)
        else:
            raise ValueError(
                "Unknown specification of expected values exp=%r" % (exp, ))
    else:
        assert (exp.shape == obs.shape)

    # make sure to have floating point data
    exp = exp.astype(float)

    # compute chisquare value
    exp_zeros = exp == 0
    exp_nonzeros = np.logical_not(exp_zeros)
    if np.sum(exp_zeros) != 0 and (obs[exp_zeros] != 0).any():
        raise ValueError("chisquare: Expected values have 0-values, but there are actual" \
              " observations -- chi^2 cannot be computed")
    chisq = np.sum(((obs - exp)**2)[exp_nonzeros] / exp[exp_nonzeros])

    # return chisq and probability (upper tail)
    # taking only the elements with something expected
    return chisq, st.chisqprob(chisq, np.sum(exp_nonzeros) - 1)

Beispiel #20

0

Datei anzeigen

def pfisher(pvalues):
    """
    Combine independent P-values into one according to

    Fisher, R. A. (1948) Combining independent tests of significance.
    American Statistician, vol. 2, issue 5, page 30.

    ('Fisher method' or 'inverse ChiSquare method') See also book:
    Walter W. Piegorsch, A. John Bailer: Analyzing Environmental Data.
    Wiley 2005

    @param pvalues: list of independent P-values
    @type  pvalues: [ float ]
    @return: P-value 
    @rtype: float
    """
    ## stats.mannwhitneyu minimal P ~ stats.zprob( 8.2 );
    ## all below becomes 0. which is not handled by the fisher test
    clipped = N.clip(pvalues, 1.0e-16, 1.0)

    x2 = -2 * N.sum(N.log(clipped))

    if not USING_SCIPY:
        x2 = float(x2)

    return stats.chisqprob(x2, 2 * len(pvalues))

Beispiel #21

0

Datei anzeigen

 def combine_fisher(self, pvalue1, pvalue2):
     if pvalue1 == 0.0 or pvalue2 == 0.0:
         return 0.0
     else:
         chi = -2.0 * (math.log(pvalue1) + math.log(pvalue2))
         p_out = chisqprob(chi, 4)
         return p_out

Beispiel #22

0

Datei anzeigen

Datei: mod_categorical_data_analysis.py Projekt: CNIO-Biocomputing-Team/Kinmut

def get_p_value_pearson_chi_squared(contingency_table):
	
	(n11,n12,n21,n22) = contingency_table

	n = n11+n12+n21+n22
	
	if n == 0:
		
		raise "The contingency table is empty"
	
	n_1_plus = float(n11 + n12)
	n_2_plus = float(n21 + n22)
	n_plus_1 = float(n11 + n21)
	n_plus_2 = float(n12 + n22)
	
	if n == n_1_plus:

		return float(1)
	
	elif n == n_2_plus:
		
		return float(1)
		
	# eij = (n_i_plus)(n_plus_j)/n
	e11 = (n_1_plus)*(n_plus_1)/n
	e12 = (n_1_plus)*(n_plus_2)/n
	e21 = (n_2_plus)*(n_plus_1)/n
	e22 = (n_2_plus)*(n_plus_2)/n
					
	chi2 = (math.pow(n11-e11,2)/e11) + (math.pow(n12-e12,2)/e12) + (math.pow(n21-e21,2)/e21) + (math.pow(n22-e22,2)/e22)

	p_value = chisqprob(chi2,1)

	return p_value

Beispiel #23

0

Datei anzeigen

Datei: compare_histograms.py Projekt: MrTheodor/lab-tools

def chi_square(hist_1, hist_2):
    diff_1 = hist_1 - hist_2
    val = np.nansum((np.power(diff_1, 2))/(hist_1+hist_2))
    ddof = len(hist_1)

    print('T = {}'.format(val))
    print('P(chi^2 > T) = {}'.format(chisqprob(val, ddof)))

Beispiel #24

0

Datei anzeigen

Datei: DartModules.py Projekt: esteinig/dartQC

    def _calculate_hwe(self, snp, genotypes):

        """
        Calculates p-value for HWE using ChiSquare statistic: remove missing, get observed counts, get observed
        frequencies, get expected counts, calculate test values using (O-E)**2 / E and return ChiSquare probability
        with 1 degree of Freedom (bi-allelic SNP).

        """

        adjusted_samples = self.sample_size - genotypes.count(self.missing)
        hetero_obs, major_obs, minor_obs = self._get_observed(genotypes)

        try:
            p = (major_obs + (hetero_obs / 2)) / adjusted_samples
            q = (minor_obs + (hetero_obs / 2)) / adjusted_samples
        except ZeroDivisionError:
            # print("Detected complete missing data in SNP:", snp)
            return 0

        if (p + q) != 1:
            ValueError("Sum of observed allele frequencies (p + q) does not equal one.")

        hetero_exp, major_exp, minor_exp = self._get_expected(p, q, adjusted_samples)

        try:
            hetero_test = ((hetero_obs - hetero_exp) ** 2) / hetero_exp
            major_test = ((major_obs - major_exp) ** 2) / major_exp
            minor_test = ((minor_obs - minor_exp) ** 2) / minor_exp
        except ZeroDivisionError:
            return 0

        return stats.chisqprob(sum([hetero_test, major_test, minor_test]), 1)

Beispiel #25

0

Datei anzeigen

Datei: test_mstats_basic.py Projekt: SytseK/scipy

def check_chisquare(f_obs, f_exp, ddof, axis, expected_chi2):
    # Use this only for arrays that have no masked values.
    f_obs = np.asarray(f_obs)
    if axis is None:
        num_obs = f_obs.size
    else:
        if axis == 'no':
            use_axis = 0
        else:
            use_axis = axis
        b = np.broadcast(f_obs, f_exp)
        num_obs = b.shape[use_axis]

    if axis == 'no':
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof, axis=axis)
    assert_array_equal(chi2, expected_chi2)

    ddof = np.asarray(ddof)
    expected_p = stats.chisqprob(expected_chi2, num_obs - 1 - ddof)
    assert_array_equal(p, expected_p)

    # Also compare to stats.chisquare
    if axis == 'no':
        stats_chisq, stats_p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        stats_chisq, stats_p = stats.chisquare(f_obs,
                                               f_exp=f_exp,
                                               ddof=ddof,
                                               axis=axis)
    assert_array_almost_equal(chi2, stats_chisq)
    assert_array_almost_equal(p, stats_p)

Beispiel #26

0

Datei anzeigen

Datei: Asymmetries.py Projekt: UCNA/main

	def plot_data_key(self, nm, conn, gnm=None):
	
		self.gather_points(nm, conn)
		self.gdat = [ [n, self.datpts[k].value * self.yscale, self.datpts[k].err * self.yscale] for (n,k) in enumerate(self.datkeys) if self.datpts[k].value is not None]
		
		try:
			self.LF.fit(self.gdat,cols=(0,1,2),errorbarWeights=True)
			chi2 = self.LF.chisquared()
			ndf = self.LF.nu()
			statdat = {"mu":self.LF.coeffs[0], "rms":self.LF.rmsDeviation(), "uncert":self.LF.coeffErr(0), "chi2":chi2 , "ndf":ndf}
			if stats:
				statdat["prob"] = stats.chisqprob(statdat["chi2"],statdat["ndf"])
			else:
				statdat["prob"] = 0
			if gnm:
				gnm = gnm%statdat
			print gnm
			self.g.plot(graph.data.function("y(x)=%g"%self.LF.coeffs[0], title=None), [ graph.style.line(lineattrs=[self.ptcolor,style.linestyle.dashed]),])
		except:
			gnm = None
			
		self.g.axes['x'].max = self.gdat[-1][0]
					
		self.g.plot(graph.data.points(self.gdat,x=1,y=2,dy=3,title=gnm), [ graph.style.errorbar(errorbarattrs=[self.ptcolor]),
																		graph.style.symbol(self.ptsymb, size=0.15, symbolattrs = [self.ptcolor])])

Beispiel #27

0

Datei anzeigen

Datei: stat_tools.py Projekt: wym109/kde

def walds_test(profile1, profile2):
    """Calculate the compatibility of two statistically independent
    measurements using normal approximation (Wald's method).

    This assumes that the log-likelihood space is approximately elliptically.

    Parameters
    ----------
    profile1 : (x,y,llh) for measurement 1
    profile2 : (x,y,llh) for measurement 2

    """
    from scipy.stats import chisqprob
    from scipy.special import erfinv
    bestfits, covariances = [], []
    for x, y, llhs in [profile1, profile2]:
        idx_min = np.unravel_index(llhs.argmin(), llhs.shape)
        bestfit = x[idx_min[1]], y[idx_min[0]]
        bestfits.append(bestfit)
        covariance = estimate_cov_from_contour(x, y, llhs, bestfit)
        covariances.append(covariance)

    diff = np.matrix(bestfits[0]) - np.matrix(bestfits[1])
    cov_inv = np.linalg.inv(covariances[0] + covariances[1])

    chi2 = diff*cov_inv*diff.transpose()
    ndof = 2
    pvalue = chisqprob(chi2, ndof)
    nsigma = erfinv(1-pvalue) * np.sqrt(2) # 2-sided significance

    return (chi2, ndof, pvalue, nsigma)

Beispiel #28

0

Datei anzeigen

def get_p_value_pearson_chi_squared(contingency_table):

    (n11, n12, n21, n22) = contingency_table

    n = n11 + n12 + n21 + n22

    if n == 0:

        raise "The contingency table is empty"

    n_1_plus = float(n11 + n12)
    n_2_plus = float(n21 + n22)
    n_plus_1 = float(n11 + n21)
    n_plus_2 = float(n12 + n22)

    if n == n_1_plus:

        return float(1)

    elif n == n_2_plus:

        return float(1)

    # eij = (n_i_plus)(n_plus_j)/n
    e11 = (n_1_plus) * (n_plus_1) / n
    e12 = (n_1_plus) * (n_plus_2) / n
    e21 = (n_2_plus) * (n_plus_1) / n
    e22 = (n_2_plus) * (n_plus_2) / n

    chi2 = (math.pow(n11 - e11, 2) / e11) + (math.pow(n12 - e12, 2) / e12) + (
        math.pow(n21 - e21, 2) / e21) + (math.pow(n22 - e22, 2) / e22)

    p_value = chisqprob(chi2, 1)

    return p_value

Beispiel #29

0

Datei anzeigen

def likelihood_ratio_test(ll_min: float, ll_max: float, dof_min: int,
                          dof_max: int) -> (float, float):
    """
    Assesses the goodness of fit of two competing statistical models based on the ratio of their likelihoods.
    

    Parameters
    ----------
    ll_min : float
        Likelihood of the less complex model.
    ll_max : float
        Likelihood of the more complex model.
    dof_min : int
        Degrees of freedom of the less complex model.
    dof_max : int
        Degrees of freedom of the more complex model.

    Returns
    -------
    (float, float)
        lr: Likelihood ratio.
        p: p Value.

    """
    lr = 2 * (ll_max - ll_min)
    delta_dof = dof_max - dof_min
    p = stats.chisqprob(lr, delta_dof)
    return (lr, p)

Beispiel #30

0

Datei anzeigen

Datei: fortelib.py Projekt: forte-bin/fortelib

def englishness(s):
	if s is None:
		return 0
	from scipy.stats import chisqprob
	#uses a chi square algorithm to match the relative charcter frequencies
	#in the test string to that of real english
	score = 0
	s = s.lower()
	for c in EXTRA_CHARS:
		s = s.replace(c,'')#completely ignore characters in EXTRA_CHAR
	frequency = defaultdict(float)
	ignored = 0 #ignore, but only for purpose of chisquare computation
	length = len(s)
	for i in s:
		#analyze each character
		if i not in string.printable: #non-printables are bad
			score += 2
			ignored += 1
		elif i in string.digits: #digits arent that bad
			score += 0.5
			ignored += 1
		elif i not in string.ascii_lowercase and i not in ' ':#special chars are eh
			score += 1
			ignored += 1
		else:
			frequency[i] += 1 #analyze alphabetic frequencies.
		
	for i in frequency:
		freq = frequency[i] / (length - ignored)
		# Chi square
		score += pow((freq - CHAR_FREQ[i]/100), 2) / (CHAR_FREQ[i]/100)
	if not score:
		return 0
	return chisqprob(score,1) * 100 #return probability

Beispiel #31

0

Datei anzeigen

def hardy_weinberg_asymptotic(obs_het, obs_a, obs_b):
    obs_het = float(obs_het)
    obs_a = float(obs_a)
    obs_b = float(obs_b)
    sample_size = obs_het + obs_a + obs_b
    p = (((2 * obs_a) + obs_het) / (2 * (sample_size)))
    q = 1 - p
    exp_a = p * p * sample_size
    exp_b = q * q * sample_size
    exp_ab = 2 * p * q * sample_size

    # get chiSquare values
    if (exp_a == 0):
        chi_a = 0
    else:
        chi_a = ((obs_a - exp_a) * 2.0) / exp_a
    if (exp_b == 0):
        chi_b = 0
    else:
        chi_b = ((obs_b - exp_b) * 2.0) / exp_b
    if (exp_ab == 0):
        chi_ab = 0
    else:
        chi_ab = ((obs_het - exp_ab) * 2.0) / exp_ab
    chi_sq_total = chi_a + chi_b + chi_ab
    return stats.chisqprob(chi_sq_total, 1)

Beispiel #32

0

Datei anzeigen

Datei: cluster.py Projekt: xtmgah/SVclone

def get_most_likely_cn(combo, cn_lik, pval_cutoff):
    '''
    use the most likely phi state, unless p < cutoff when compared to the
    most likely clonal (phi=1) case (log likelihood ratio test)
    - in this case, pick the most CN state with the highest clonal likelihood
    '''
    cn_lik_phi, cn_lik_clonal = cn_lik
    ll_phi, ll_clonal = cn_lik_phi[1], cn_lik_clonal[1]

    empty_result = [float('nan'), float('nan'), float('nan'), float('nan')]
    if len(combo) == 0:
        return empty_result
    elif len(combo) == 1:
        return combo[0]
    elif np.all(np.isnan(ll_phi)) and np.all(np.isnan(ll_clonal)):
        return empty_result
    elif np.all(np.isnan(ll_phi)):
        return combo[index_of_max(ll_clonal)]
    elif np.all(ll_phi == ll_clonal) or pval_cutoff == 0:
        return combo[index_of_max(ll_phi)]

    # log likelihood ratio test; null hypothesis = likelihood under phi
    # use clonal if best clonal solution significantly better than worst phi solution
    #LLR   = 2 * (np.nanmax(ll_clonal) - np.nanmax(ll_phi))
    LLR   = 2 * (np.nanmax(ll_clonal) - np.nanmin(ll_phi))
    p_val = stats.chisqprob(LLR, 1) if not np.isnan(LLR) else 1

    if p_val < pval_cutoff:
        return combo[index_of_max(ll_clonal)]
    else:
        return combo[index_of_max(ll_phi)]

Beispiel #33

0

Datei anzeigen

Datei: calc_tools.py Projekt: erezcoh4/mySoftware

def FisherMethodPvals(pvalues_array):
    # (adopted from a code by Arie Shaus, Nov 2016)
    pvalues_array = np.array(pvalues_array)
    k = len(pvalues_array)
    z = -2*sum(np.log(pvalues_array))
    combined_Pval = chisqprob(z,2*k)
    return combined_Pval

Beispiel #34

0

Datei anzeigen

def wald_test(betas, r, q, vm):
    '''
    Chi sq. Wald statistic to test for restriction of coefficients.
    Implementation following Greene [Greene2003]_ eq. (17-24), p. 488

    ...

    Parameters
    ==========
    betas   : array
              kx1 array with coefficient estimates
    r       : array
              Array of dimension Rxk (R being number of restrictions) with constrain setup.
    q       : array
              Rx1 array with constants in the constraint setup. See Greene
              [1]_ for reference.
    vm      : array
              kxk variance-covariance matrix of coefficient estimates

    Returns
    =======
    w       : float
              Wald statistic
    pvalue  : float
              P value for Wald statistic calculated as a Chi sq. distribution
              with R degrees of freedom

    '''
    rbq = np.dot(r, betas) - q
    rvri = la.inv(np.dot(r, np.dot(vm, r.T)))
    w = np.dot(rbq.T, np.dot(rvri, rbq))[0][0]
    df = r.shape[0]
    pvalue = chisqprob(w, df)
    return w, pvalue

Beispiel #35

0

Datei anzeigen

Datei: test_continuous_basic.py Projekt: witcxc/scipy

def check_sample_var(sv,n, popvar):
    # two-sided chisquare test for sample variance equal to hypothesized variance
    df = n-1
    chi2 = (n-1)*popvar/float(popvar)
    pval = stats.chisqprob(chi2,df)*2
    npt.assert_(pval > 0.01, 'var fail, t, pval = %f, %f, v, sv=%f, %f' %
            (chi2,pval,popvar,sv))

Beispiel #36

0

Datei anzeigen

 def LR(self):
     if 'LR' not in self._cache:
         P = 1.0 * np.sum(self.y) / self.n
         LR = float(-2 * (self.n * (P * np.log(P) +
                                    (1 - P) * np.log(1 - P)) - self.logl))
         self._cache['LR'] = (LR, chisqprob(LR, self.k))
     return self._cache['LR']

Beispiel #37

0

Datei anzeigen

def gtest(obs, exp, ddof=0):
    '''
    http://en.wikipedia.org/wiki/G-test
    test for goodness of fit to expected frequencies
    
    obs - observed fres
    exp - expected freqs
    ddof - delta dof

    returns
    chisquare statistic and p value
    
    based on https://gist.github.com/brentp/570896
    '''

    assert len(obs) == len(exp)
    assert not 0.0 in exp

    n = len(obs)

    g = 0.0
    for i in xrange(n):
        if obs[i] == 0.0: continue  #Oi * ln( Oi / Ei) == 0 if Oi == 0

        g += obs[i] * math.log(obs[i] / exp[i])

        if exp[i] < 5.0:
            sys.stderr.write("warning: expected value less than 5 in gtest\n")
    g *= 2.0

    return g, chisqprob(g, n - 1 - ddof)

Beispiel #38

0

Datei anzeigen

Datei: test_mstats_basic.py Projekt: Hydroinformatics-UNESCO-IHE/scipy

def check_chisquare(f_obs, f_exp, ddof, axis, expected_chi2):
    # Use this only for arrays that have no masked values.
    f_obs = np.asarray(f_obs)
    if axis is None:
        num_obs = f_obs.size
    else:
        if axis == 'no':
            use_axis = 0
        else:
            use_axis = axis
        b = np.broadcast(f_obs, f_exp)
        num_obs = b.shape[use_axis]

    if axis == 'no':
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof, axis=axis)
    assert_array_equal(chi2, expected_chi2)

    ddof = np.asarray(ddof)
    expected_p = stats.chisqprob(expected_chi2, num_obs - 1 - ddof)
    assert_array_equal(p, expected_p)

    # Also compare to stats.chisquare
    if axis == 'no':
        stats_chisq, stats_p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        stats_chisq, stats_p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof,
                                               axis=axis)
    assert_array_almost_equal(chi2, stats_chisq)
    assert_array_almost_equal(p, stats_p)

Beispiel #39

0

Datei anzeigen

def lrtest(llmin, llmax):
    """
    Likelihood Ratio Test (LRT) by Joanna Diong
    https://scientificallysound.org/2017/08/24/the-likelihood-ratio-test-relevance-and-application/

    Example:

    # import example dataset
    data = sm.datasets.get_rdataset("dietox", "geepack").data

    # fit time only to pig weight
    md = smf.mixedlm("Weight ~ Time", data, groups=data["Pig"])
    mdf = md.fit(reml=False)
    print(mdf.summary())
    llf = mdf.llf

    # fit time and litter to pig weight
    mdlitter = smf.mixedlm("Weight ~ Time + Litter", data, groups=data["Pig"])
    mdflitter = mdlitter.fit(reml=False)
    print(mdflitter.summary())
    llflitter = mdflitter.llf

    lr, p = lrtest(llf, llflitter)
    print('LR test, p value: {:.2f}, {:.4f}'.format(lr, p))

    :param llmin: Log-likelihood of null model (the model without the variable we are considering to add).
    :param llmax: Log-likelihood of the alternative model (the model with the extra variable).
    :return: lr, p
    * lr: likelihood ratio
    * p: p-value to reject the hypothesis that the alternative model fits the data no better than the null model.
    """
    lr = 2 * (llmax - llmin)
    p = stats.chisqprob(lr, 1) # llmax has 1 dof more than llmin
    return lr, p

Beispiel #40

0

Datei anzeigen

Datei: hmlike.py Projekt: nrnhines/bfilt

 def p_true(self):
     self.truelike = self.M_true.likelihood(self.M_true.simData)
     self.findmle(self.tau01_true, self.tau12_true, self.N_true)
     self.CS = 2.0 * (self.ml - self.truelike)  #plus log-likelihood
     numparam = 2
     self.pValue = stats.chisqprob(self.CS, numparam)
     return self.pValue

Beispiel #41

0

Datei anzeigen

Datei: probit.py Projekt: GeoDaCenter/GeoDaSpace

 def LR(self):
     if 'LR' not in self._cache:
         P = 1.0 * np.sum(self.y) / self.n
         LR = float(
             -2 * (self.n * (P * np.log(P) + (1 - P) * np.log(1 - P)) - self.logl))
         self._cache['LR'] = (LR, chisqprob(LR, self.k))
     return self._cache['LR']

Beispiel #42

0

Datei anzeigen

Datei: crofton_method.py Projekt: mqwilber/parasite_mortality

def test_for_pihm(data, guess=[10, -5], crof_params=None):
    """
    Test a dataset for parasite induced host mortality

    Parameters
    ----------
    data : array
        Hosts with a given parasite loads

    Returns
    -------
    :
    """

    # Get full nll
    params = likelihood_method(data, crof_params=crof_params, guess=guess)
    full_nll = likefxn1(params, data)


    if crof_params:
        mu, k = np.array(crof_params)[1:]
        red_nll = comp.nll(data, mod.nbinom(mu, k))

    else:
        mle_fit = mod.nbinom.fit_mle(data, k_array=np.linspace(.1, 2, 100))
        red_nll = comp.nll(data, mod.nbinom(*mle_fit))

    chi_sq = 2 * (-full_nll - (-red_nll))
    prob = chisqprob(chi_sq, 2)
    return chi_sq, prob, full_nll, red_nll

Beispiel #43

0

Datei anzeigen

Datei: models.py Projekt: glciampaglia/editor-lifecycle

    def gof(self, x, y, ye):
        '''
        Computes GoF test statistics and other diagnostical tests

        Returns:
        --------
        - GoF test: Chi^2, p-value, and ddof
        - Normality of residuals: K^2 and p-value
        '''
        res = {}
        resid = y - self(x)
        chisq = np.sum(((resid) / ye) ** 2)
        ddof = len(x) - len(filter(None, self.errors())) # number of estimated parameters
        chisq_pvalue = chisqprob(chisq, ddof)
        gof = (chisq, chisq_pvalue, ddof)
        resid = normaltest(resid)
        ym = y.mean()
        SStot = np.sum((y - ym) ** 2)
        SSerr = np.sum((y - self(x)) ** 2)
        Rsquared = 1.0 - SSerr / SStot
# Besides being buggy, this test for homoscedasticity is supposed to work only
# for linear regressions, hence is not suited for our case, but I'll keep it
# here until I figure out an alternative. Remember to uncomment the import for
# OLS ontop.
#        regresults = OLS(resid ** 2, np.c_[x, x**2]).fit()
#        LM =regresults.rsquared 
#        LM_pvalue = chisqprob(LM, len(x) - ddof)
#        white = (LM, LM_pvalue)
#        return gof, resid, white 
        return gof, resid, Rsquared

Beispiel #44

0

Datei anzeigen

    def __init__(self, statmatch):
        """
        Populate a pandas data frame and pass it forward as BalanceStatistics. Generally, operations are vectorized
        where possible and each method works on several covariates from a statistical matching routine at a time.
        :param statmatch: StatisticalMatching instance that has been fitted
        :return: BalanceStatistics instance
        """

        # Could be replaced with an ordered dictionary
        columns = [
            'unmatched_treated_mean', 'unmatched_control_mean',
            'unmatched_bias', 'unmatched_t_statistic', 'unmatched_p_value',
            'matched_treated_mean', 'matched_control_mean', 'matched_bias',
            'matched_t_statistic', 'matched_p_value', 'bias_reduction'
        ]

        data = {
            'unmatched_treated_mean': self._unmatched_treated_mean(statmatch),
            'unmatched_control_mean': self._unmatched_control_mean(statmatch),
            'unmatched_bias': self._unmatched_bias(statmatch),
            'unmatched_t_statistic': self._unmatched_t_statistic(statmatch),
            'unmatched_p_value': self._unmatched_p_value(statmatch),
            'matched_treated_mean': self._matched_treated_mean(statmatch),
            'matched_control_mean': self._matched_control_mean(statmatch),
            'matched_bias': self._matched_bias(statmatch),
            'matched_t_statistic': self._matched_t_statistic(statmatch),
            'matched_p_value': self._matched_p_value(statmatch),
            'bias_reduction': self._bias_reduction(statmatch)
        }

        # dataframe with column defined above
        super(BalanceStatistics, self).__init__(data,
                                                index=statmatch.names,
                                                columns=columns)

        # Whenever it becomes a problem that we have three copies of how to run regression, we can refactor this into another class
        fitted_reg = self._fit_unmatched_regression(statmatch)
        self.unmatched_prsquared = 1 - fitted_reg.llf / fitted_reg.llnull
        self.unmatched_llr = -2 * (fitted_reg.llnull - fitted_reg.llf)
        self.unmatched_llr_pvalue = chisqprob(self.unmatched_llr,
                                              fitted_reg.df_model)

        fitted_reg = self._fit_matched_regression(statmatch)
        self.matched_prsquared = 1 - fitted_reg.llf / fitted_reg.llnull
        self.matched_llr = -2 * (fitted_reg.llnull - fitted_reg.llf)
        self.matched_llr_pvalue = chisqprob(self.matched_llr,
                                            fitted_reg.df_model)

Beispiel #45

0

Datei anzeigen

 def hessTest(self, L):
     matrixList = L[:]
     for xy in matrixList:
         xy = numpy.matrix(xy).T
         xy[0] -= self.MLE[0]
         xy[1] -= self.MLE[1]
         CS = xy.T * self.H * xy
         print 'p-value:', stats.chisqprob(CS, 2)

Beispiel #46

0

Datei anzeigen

Datei: SASE_hunter.py Projekt: kylessmith/SASE-hunter

def fisher_combine(pvals):
    """ combined fisher probability with correction
    Use fdr correction for 25 comparisons using rpy2"""
    if all(p == "NA" for p in pvals): return np.nan
    pvals = [p for p in pvals if p != "NA"]
    if len(pvals) == 1: return pvals[0]
    s = -2 * np.sum(np.log(pvals))
    return chisqprob(s, 2 * len(pvals))

Beispiel #47

0

Datei anzeigen

Datei: likelihood_ratio_test.py Projekt: Nicholas-NVS/bio-pipeline

def likelihood_ratio_test(counts, model1, model2):
    # see formula <http://en.wikipedia.org/wiki/Likelihood-ratio_test>
    print 'Test %s and %s' % (model1.name, model2.name)
    D = -2 * (model1.lnL - model2.lnL)
    df = model2.df - model1.df
    p_value = chisqprob(D, df) 

    print 'D = %.1f, df = %d, P-value = %.2g' % (D, df, p_value)

Beispiel #48

0

Datei anzeigen

def fisher_combine(pvals):
    """ combined fisher probability with correction
    Use fdr correction for 25 comparisons using rpy2"""
    if all(p == "NA" for p in pvals): return np.nan
    pvals = [p for p in pvals if p != "NA"]
    if len(pvals) == 1: return pvals[0]
    s = -2 * np.sum(np.log(pvals))
    return chisqprob(s, 2 * len(pvals))

Beispiel #49

0

Datei anzeigen

Datei: likelihood_ratio_test.py Projekt: yuzhenpeng/bio-pipeline

def likelihood_ratio_test(counts, model1, model2):
    # see formula <http://en.wikipedia.org/wiki/Likelihood-ratio_test>
    print 'Test %s and %s' % (model1.name, model2.name)
    D = -2 * (model1.lnL - model2.lnL)
    df = model2.df - model1.df
    p_value = chisqprob(D, df)

    print 'D = %.1f, df = %d, P-value = %.2g' % (D, df, p_value)

Beispiel #50

0

Datei anzeigen

Datei: stats.py Projekt: kirtyvedula/PyMVPA

def chisquare(obs, exp="uniform"):
    """Compute the chisquare value of a contingency table with arbitrary
    dimensions.

    Parameters
    ----------
    obs : array
      Observations matrix
    exp : ('uniform', 'indep_rows') or array, optional
      Matrix of expected values of the same size as `obs`.  If no
      array is given, then for 'uniform' -- evenly distributes all
      observations.  In 'indep_rows' case contingency table takes into
      account frequencies relative across different columns, so, if
      the contingency table is predictions vs targets, it would
      account for dis-balance among different targets.  Although
      'uniform' is the default, for confusion matrices 'indep_rows' is
      preferable.

    Returns
    -------
    tuple
     chisquare-stats, associated p-value (upper tail)
    """
    obs = np.array(obs)

    # get total number of observations
    nobs = np.sum(obs)

    # if no expected value are supplied assume equal distribution
    if not isinstance(exp, np.ndarray):
        ones = np.ones(obs.shape, dtype=float)
        if exp == "indep_rows":
            # multiply each column
            exp = np.sum(obs, axis=0)[None, :] * ones / obs.shape[0]
        elif exp == "indep_cols":
            # multiply each row
            exp = np.sum(obs, axis=1)[:, None] * ones / obs.shape[1]
        elif exp == "uniform":
            # just evenly distribute
            exp = nobs * np.ones(obs.shape, dtype=float) / np.prod(obs.shape)
        else:
            raise ValueError, "Unknown specification of expected values exp=%r" % (exp,)
    else:
        assert exp.shape == obs.shape

    # make sure to have floating point data
    exp = exp.astype(float)

    # compute chisquare value
    exp_zeros = exp == 0
    exp_nonzeros = np.logical_not(exp_zeros)
    if np.sum(exp_zeros) != 0 and (obs[exp_zeros] != 0).any():
        raise ValueError, "chisquare: Expected values have 0-values, but there are actual" " observations -- chi^2 cannot be computed"
    chisq = np.sum(((obs - exp) ** 2)[exp_nonzeros] / exp[exp_nonzeros])

    # return chisq and probability (upper tail)
    # taking only the elements with something expected
    return chisq, st.chisqprob(chisq, np.sum(exp_nonzeros) - 1)

Beispiel #51

0

Datei anzeigen

def hessEval(r, theta, nbf, alpha=0.0):
    global H, MLE
    x = r * math.cos(theta)
    y = r * math.sin(theta)
    # print 'x', x, 'y', y
    # HI = numpy.matrix(H).I
    xy = numpy.matrix([[x], [y]])
    CS = xy.T * H * xy
    return stats.chisqprob(CS, 2) - alpha

Beispiel #52

0

Datei anzeigen

Datei: stats.py Projekt: akshayshende129/artml

def chi2(BET, feature_1, feature_2):

    l = (len(BET))
    BET.reset_index(drop=True, inplace=True)
    x = BET.to_dict(orient='list')
    keys = list(x.keys())
    obs_freq = {}
    exp_freq = {}
    sum_exp_freq_vertical = np.zeros(len(feature_2))
    chi2 = 0

    for i in range(len(feature_1)):
        obs_freq[feature_1[i]] = []

        for j in range(len(feature_2)):
            col1 = (feature_1[i])
            col2 = (feature_2[j])
            sumx = x[col1][keys.index(col2)][10]
            obs_freq[feature_1[i]].append(sumx)

        sum_exp_freq_vertical = sum_exp_freq_vertical + np.array(
            obs_freq[feature_1[i]])
    total_in_contingency = sum(sum_exp_freq_vertical)

    for i in range(len(feature_1)):
        exp_freq[feature_1[i]] = []
        sum_exp_freq_horizontal = sum(obs_freq[feature_1[i]])
        for j in range(len(feature_2)):
            e = (sum_exp_freq_horizontal *
                 sum_exp_freq_vertical[j]) / total_in_contingency
            exp_freq[feature_1[i]].append(e)

    for i in range(len(feature_1)):
        for j in range(len(feature_2)):
            chi2 = chi2 + (
                (obs_freq[feature_1[i]][j] - exp_freq[feature_1[i]][j])**
                2) / exp_freq[feature_1[i]][j]

    df = (len(feature_1) - 1) * (len(feature_2) - 1)

    print('chi2: ' + str(chi2))
    print('df: ' + str(df))
    print('chisqprob: ' + str(chisqprob(chi2, df)))
    return (chisqprob(chi2, df))

Beispiel #53

0

Datei anzeigen

def calculateCombinedFisher(significanceValuesList):
    #X^2_2k ~ -2 * sum(ln(p_i))

    accumulatedValue = 0
    for significanceValues in significanceValuesList:
        accumulatedValue += log(significanceValues[2])

    accumulatedValue = accumulatedValue * -2

    return (chisqprob(accumulatedValue, 2 * len(significanceValuesList)))

Beispiel #54

0

Datei anzeigen

Datei: compare_histograms.py Projekt: MrTheodor/lab-tools

def chi_square_shape(hist_1, hist_2):
    n1 = np.sum(hist_1)
    n2 = np.sum(hist_2)
    diff_1 = (hist_1/n1) - (hist_2/n2)
    sum_1 = (hist_1/(n1*n1)) + (hist_2/(n2*n2))
    val = np.nansum((np.power(diff_1, 2))/sum_1)
    ddof = len(hist_1) - 1

    print ('T = {}'.format(val))
    print('P(chi^2 > T) = {}'.format(chisqprob(val, ddof)))

Beispiel #55

0

Datei anzeigen

Datei: result_file.py Projekt: alfonsoeromero/ConSAT

 def combine_fisher(self, pvalue1, pvalue2):
     """ Combine two p-values using Fihser's method. See
         https://en.wikipedia.org/wiki/Fisher%27s_method for
         more details
     """
     if pvalue1 == 0.0 or pvalue2 == 0.0:
         return 0.0
     chi = -2.0 * (math.log(pvalue1) + math.log(pvalue2))
     p_out = chisqprob(chi, 4)
     return p_out

Beispiel #56

0

Datei anzeigen

Datei: PSPFunc.py Projekt: KTilton/DGINN

def LRT(ll1, ll2, df):
    """
	Calculates likelihood ratio test between two models.
	:params ll1, ll2: likelihood of the two models studied
	:param df: degrees of freedom of difference between the two models
	"""
    LR = abs(2 * (ll1 - ll2))
    stats.chisqprob = lambda chisq, df: stats.chi2.sf(LR, df)
    p = stats.chisqprob(LR, df)
    return (LR, p)

Beispiel #57

0

Datei anzeigen

Datei: criterion.py Projekt: glciampaglia/Peers

def chisq_2sam(f_obs1, f_obs2):
    """
    Calculates a two-sample chi square test.
       
    The two samples chi square test tests the null hypothesis that the two
    categorical data sample have the same frequencies.
    
    Parameters
    ----------
    f_obs1, f_obs2 : two arrays
        with observed frequencies in each category. The number of categories
        must be the same.
    
    Returns
    -------
    chisquare statistic : float
        The chisquare test statistic
    p : float
        The p-value of the test.
    
    Notes
    -----
    If the number of observation is the same across the two samples, then the
    number of degrees of freedom is equal to the number of bins minus one (due
    to the additional constraint on the sample size), else it is equal to the
    number of bins. The same observations on the size of the sample in
    the one-way chi squared test (see scipy.stats.chisquare) apply also for the
    case with two samples.
    
    Examples
    --------
    >>> chisq2sam(np.ones(10), np.ones(10)) # same frequencies
    (0.0, 1.0)
    >>> chi2, pval = chisq2sam([100,0, 0], [0, 0, 100])
    >>> print chi2
    200.0
    >>> print pval
    2.08848758376e-45
    """
    if len(f_obs1) != len(f_obs2):
        raise ValueError('expecting same number of bins')
    f_obs1, f_obs_2 = np.asarray(f_obs1, dtype=int), np.asarray(f_obs2, dtype=int)
    s1, s2 = np.sum(f_obs1), np.sum(f_obs2)
    if s1 == s2:
        ksntrns = 1 
    else:
        ksntrns = 0
    idx = ( f_obs1 + f_obs2 ) == 0.
    ksntrns += np.sum(idx.astype(int))
    ddof = len(f_obs1) - ksntrns
    ratio1, ratio2 = map(np.sqrt, [ s2 / s1, s1 / s2 ] )
    chisq = (( f_obs1 * ratio1 ) - ( f_obs2 * ratio2 ))**2 / ( f_obs1 + f_obs2 )
    chisq = np.sum(chisq[~idx])
    return chisq, chisqprob(chisq, ddof)

Beispiel #58

0

Datei anzeigen

Datei: calc_tools.py Projekt: erezcoh4/mySoftware

def Fisher_combination_Pvals(pvalues_array):
    pvalues_array = np.array(pvalues_array)
    z=0
    for pval in pvalues_array:
        if pval > 1.e-20:
            z += -2*np.log(pval)
        else:
            z += -2*np.log(1.e-20)
    k = len(pvalues_array)
    combined_Pval = chisqprob(z,2*k)
    return combined_Pval