Beispiel #1
0
def test_chisquare_masked_arrays():
    # The other tests were taken from the tests for stats.chisquare, so
    # they don't test the function with masked arrays.  Here masked arrays
    # are tested.
    obs = np.array([[8, 8, 16, 32, -1], [-1, -1, 3, 4, 5]]).T
    mask = np.array([[0, 0, 0, 0, 1], [1, 1, 0, 0, 0]]).T
    mobs = ma.masked_array(obs, mask)
    expected_chisq = np.array([24.0, 0.5])

    chisq, p = mstats.chisquare(mobs)
    assert_array_equal(chisq, expected_chisq)
    assert_array_almost_equal(
        p, stats.chisqprob(expected_chisq,
                           mobs.count(axis=0) - 1))

    chisq, p = mstats.chisquare(mobs.T, axis=1)
    assert_array_equal(chisq, expected_chisq)
    assert_array_almost_equal(
        p, stats.chisqprob(expected_chisq,
                           mobs.T.count(axis=1) - 1))

    # When axis=None, the two values should have type np.float64.
    chisq, p = mstats.chisquare([1, 2, 3], axis=None)
    assert_(isinstance(chisq, np.float64))
    assert_(isinstance(p, np.float64))
    assert_equal(chisq, 1.0)
    assert_almost_equal(p, stats.chisqprob(1.0, 2))
Beispiel #2
0
    def corrolateAmps(self):
        cor = []
        self.adjustPhase(self.exp_phis, self.sim_phis[0, 0:359])

        #TODO: fix sim data to avoid using "-2" to ignore 2 thast 2 extra data rows
        for i in range(len(self.sim_amps) - 2):

            # adjust for amp magnitude difference between simulation and exp results
            bg_observed = self.exp_bgs
            bg_expected = self.sim_bgs[i, 0:359]
            max_sim_bg = bg_expected.max()
            max_exp_bg = bg_observed.max()
            bg_observed = (max_sim_bg / max_exp_bg) * bg_observed

            # adjust for amp magnitude difference between simulation and exp results
            amp_observed = self.exp_amps
            amp_expected = self.sim_amps[i, 0:359]
            max_sim_amp = amp_expected.max()
            max_exp_amp = amp_observed.max()
            amp_observed = (max_sim_amp / max_exp_amp) * amp_observed

            amp_chi = st.chisquare(amp_expected, amp_observed)  #amp chisqaure
            bg_chi = st.chisquare(bg_expected, bg_observed)  #bg chisqaure

            #TODO: fix
            cor.append(amp_chi + bg_chi)  # append values amp and bg to array

        cor = np.array(cor)  # cast to numpy array

        return cor[:, 0].argmin(
        )  # get location of minumuim chisquare in array (angle)
Beispiel #3
0
def check_chisquare(f_obs, f_exp, ddof, axis, expected_chi2):
    # Use this only for arrays that have no masked values.
    f_obs = np.asarray(f_obs)
    if axis is None:
        num_obs = f_obs.size
    else:
        if axis == 'no':
            use_axis = 0
        else:
            use_axis = axis
        b = np.broadcast(f_obs, f_exp)
        num_obs = b.shape[use_axis]

    if axis == 'no':
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof, axis=axis)
    assert_array_equal(chi2, expected_chi2)

    ddof = np.asarray(ddof)
    expected_p = stats.chisqprob(expected_chi2, num_obs - 1 - ddof)
    assert_array_equal(p, expected_p)

    # Also compare to stats.chisquare
    if axis == 'no':
        stats_chisq, stats_p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        stats_chisq, stats_p = stats.chisquare(f_obs,
                                               f_exp=f_exp,
                                               ddof=ddof,
                                               axis=axis)
    assert_array_almost_equal(chi2, stats_chisq)
    assert_array_almost_equal(p, stats_p)
def check_chisquare(f_obs, f_exp, ddof, axis, expected_chi2):
    # Use this only for arrays that have no masked values.
    f_obs = np.asarray(f_obs)
    if axis is None:
        num_obs = f_obs.size
    else:
        if axis == 'no':
            use_axis = 0
        else:
            use_axis = axis
        b = np.broadcast(f_obs, f_exp)
        num_obs = b.shape[use_axis]

    if axis == 'no':
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        chi2, p = mstats.chisquare(f_obs, f_exp=f_exp, ddof=ddof, axis=axis)
    assert_array_equal(chi2, expected_chi2)

    ddof = np.asarray(ddof)
    expected_p = stats.chisqprob(expected_chi2, num_obs - 1 - ddof)
    assert_array_equal(p, expected_p)

    # Also compare to stats.chisquare
    if axis == 'no':
        stats_chisq, stats_p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof)
    else:
        stats_chisq, stats_p = stats.chisquare(f_obs, f_exp=f_exp, ddof=ddof,
                                               axis=axis)
    assert_array_almost_equal(chi2, stats_chisq)
    assert_array_almost_equal(p, stats_p)
    def corrolateAmps(self):
        cor=[]
        self.adjustPhase(self.exp_phis,self.sim_phis[0,0:359])
        
        #TODO: fix sim data to avoid using "-2" to ignore 2 thast 2 extra data rows 
        for i in range(len(self.sim_amps)-2):

            # adjust for amp magnitude difference between simulation and exp results
            bg_observed=self.exp_bgs
            bg_expected=self.sim_bgs[i,0:359]
            max_sim_bg=bg_expected.max()
            max_exp_bg=bg_observed.max()
            bg_observed=(max_sim_bg/max_exp_bg)*bg_observed 

            # adjust for amp magnitude difference between simulation and exp results
            amp_observed=self.exp_amps
            amp_expected=self.sim_amps[i,0:359]
            max_sim_amp=amp_expected.max()
            max_exp_amp=amp_observed.max()
            amp_observed=(max_sim_amp/max_exp_amp)*amp_observed 

            
            amp_chi=st.chisquare(amp_expected,amp_observed)#amp chisqaure
            bg_chi=st.chisquare(bg_expected,bg_observed) #bg chisqaure

            #TODO: fix 
            cor.append(amp_chi+bg_chi) # append values amp and bg to array

        cor=np.array(cor)# cast to numpy array

        return cor[:,0].argmin() # get location of minumuim chisquare in array (angle)
def count_by_day_of_week(dates):
    """ takes a series of dates
        returns a series grouping by day of the week
        e.g.   (day_of_week, count)
                0 123
                1 2
                2 8
                3 2
                4 322
                5 9
                6 1
    """ # carefull!!! 0tage nicht vergessen
    by_day = pd.DataFrame(dates.value_counts())
    by_day.columns = ['entries']
    by_day['day'] = by_day.index.map(lambda x: x.weekday())
    mean_per_day = by_day['entries'].groupby(by_day['day']).mean()
    per_weekday  = by_day['entries'].groupby(by_day['day']).sum()
    #chi square

    df = pd.DataFrame({'hits':per_weekday,
                       'dfreq':_get_adj_freqs(dates) })
    
    
    df['adjusted_expectation'] = df.dfreq.map(lambda x: x * df.hits.sum() / df.dfreq.sum())
    p = chisquare(df.hits, df.adjusted_expectation).pvalue
    
    return mean_per_day,p
Beispiel #7
0
    def calculate_chi_square(self):

        expected_values = self.calculate_expected_values()

        stat_and_p_value = statistics.chisquare(self.observations, f_exp=expected_values)

        return stat_and_p_value
Beispiel #8
0
def chisq_independence(col1, col2):
    # print col1, col2
    contingencyTable = pd.crosstab(col1, col2, margins=True)

    if len(col1) / ((contingencyTable.shape[0] - 1) *
                    (contingencyTable.shape[1] - 1)) <= 5:
        return "TMC"

    expected = contingencyTable.copy()
    total = contingencyTable.loc["All", "All"]
    # print contingencyTable.index
    # print contingencyTable.columns
    for m in contingencyTable.index:
        for n in contingencyTable.columns:
            expected.loc[m, n] = contingencyTable.loc[
                m, "All"] * contingencyTable.loc["All", n] / float(total)

    # print contingencyTable
    # print expected
    observed_frq = contingencyTable.iloc[:-1, :-1].values.ravel()
    expected_frq = expected.iloc[:-1, :-1].values.ravel()

    numless1 = len(expected_frq[expected_frq < 1])
    perless5 = len(expected_frq[expected_frq < 5]) / len(expected_frq)

    #Adjustment in DOF so use the 1D chisquare to matrix shaped data; -1 in row n col because of All row and column
    matrixadj = (contingencyTable.shape[0] - 1) + (contingencyTable.shape[1] -
                                                   1) - 2
    pval = np.round(
        chisquare(observed_frq, expected_frq, ddof=matrixadj)[1], 3)

    if numless1 > 0 or perless5 >= 0.2:
        return str(pval) + "*"
    else:
        return pval
Beispiel #9
0
    def chi2Sam(self, sam, baseF, debug=False, nfcn=-1):
        '''
        determine chi2 for sample given base function baseF
        Fit function specified by baseF to sample data to extract constant, then determine chisquare of fit
        '''

        x, y = numpy.array([a
                            for a, b in sam]), numpy.array([b for a, b in sam])
        if debug: print 'adsorpMC.chi2Sam sam', sam, 'x', x, 'y', y
        self.baseF = baseF

        # do the fit
        [a0, covA] = curve_fit(self.func1, x, y)
        if debug: print 'adsorpMC.chis2Sam a0', a0

        if self.plotFits:
            if nfcn <= 0:
                sys.exit('adsorpMC.chi2Sam ERROR nfcn=' + str(nfcn) +
                         ' should be >0')
            xf = numpy.linspace(0., max(self.RefX), 1000)  #self.Duration,100)
            yf = self.func1(xf, a0)
            self.theFits[nfcn] = [xf, yf]

        # evaluate chisquare
        observed = y
        expected = self.func1(x, a0)
        if debug: print 'adsorpMC.chis2Sam observed', observed
        if debug: print 'adsorpMC.chis2Sam expected', expected
        ndf = len(x) - 1
        chisq, pvalue = chisquare(observed, expected, -1)
        if debug:
            print 'adsorpMC.chis2Sam chisq,pvalue,ndf', chisq, pvalue, ndf
        return chisq, ndf, pvalue
    def chisq_independence(self, col1, col2, verbose = False):
        contingencyTable = pd.crosstab(col1,col2,margins=True)

        if len(col1)/((contingencyTable.shape[0] - 1) * (contingencyTable.shape[1] - 1)) <= 5:
            return "TMC"

        expected = contingencyTable.copy()
        total = contingencyTable.loc["All","All"]
        # print contingencyTable.index
        # print contingencyTable.columns
        for m in contingencyTable.index:
            for n in contingencyTable.columns:
                expected.loc[m,n] = contingencyTable.loc[m,"All"]*contingencyTable.loc["All",n]/float(total)
        
        if verbose:
            print '\n\nAnalysis of models: %s and %s' % (col1.name, col2.name)
            print 'Contingency Table:'
            print contingencyTable
            # print '\nExpected Frequency Table:'
            # print expected
        observed_frq = contingencyTable.iloc[:-1,:-1].values.ravel()
        expected_frq = expected.iloc[:-1,:-1].values.ravel()

        numless1 = len(expected_frq[expected_frq<1])
        perless5 = len(expected_frq[expected_frq<5])/len(expected_frq)

        #Adjustment in DOF so use the 1D chisquare to matrix shaped data; -1 in row n col because of All row and column
        matrixadj = (contingencyTable.shape[0] - 1) + (contingencyTable.shape[1] - 1) - 2
        # print matrixadj
        pval = np.round(chisquare(observed_frq, expected_frq,ddof=matrixadj)[1],3)

        if numless1>0 or perless5>=0.2:
            return str(pval)+"*"
        else: 
            return pval
Beispiel #11
0
Datei: main.py Projekt: zjgtan/zh
def kftest(df, column, label, tag):
    df_tmp = df.loc[:, [column, label]]
    df_tmp = df_tmp.dropna()
    
    col = dict(pd.value_counts(df_tmp[column]))
    lab = dict(pd.value_counts(df_tmp[label]))
    f_obs = []
    f_exp = []

    obs_d = {}
    for i in col:
        for j in lab:
            obs = sum([1 \
                    if df_tmp.iloc[k][column] == i and df_tmp.iloc[k][label] == j \
                    else 0 for k in range(len(df_tmp))])

            obs_d.setdefault(j, {})
            obs_d[j][i] = obs

            f_obs.append(obs)

            f_exp.append(1. * lab[j] / (sum(lab.values())) * col[i])

    statics, p_value = chisquare(f_obs, f_exp, ddof=len(f_obs) - 2)

    str1 = "%d(%f),%d(%f),%d(%f),%f,%f" % (col[tag], col[tag] * 1. / sum(col.values()), 
            obs_d[0][tag],
            1. * obs_d[0][tag] / sum(obs_d[0].values()),
            obs_d[1][tag],
            1. * obs_d[1][tag] / sum(obs_d[1].values()),
            statics,
            p_value)
    return str1
Beispiel #12
0
Datei: main.py Projekt: zjgtan/zh
def kftest(df, column, label, tag):
    df_tmp = df.loc[:, [column, label]]
    df_tmp = df_tmp.dropna()

    col = dict(pd.value_counts(df_tmp[column]))
    lab = dict(pd.value_counts(df_tmp[label]))
    f_obs = []
    f_exp = []

    obs_d = {}
    for i in col:
        for j in lab:
            obs = sum([1 \
                    if df_tmp.iloc[k][column] == i and df_tmp.iloc[k][label] == j \
                    else 0 for k in range(len(df_tmp))])

            obs_d.setdefault(j, {})
            obs_d[j][i] = obs

            f_obs.append(obs)

            f_exp.append(1. * lab[j] / (sum(lab.values())) * col[i])

    statics, p_value = chisquare(f_obs, f_exp, ddof=len(f_obs) - 2)

    str1 = "%d(%f),%d(%f),%d(%f),%f,%f" % (
        col[tag], col[tag] * 1. / sum(col.values()), obs_d[0][tag],
        1. * obs_d[0][tag] / sum(obs_d[0].values()), obs_d[1][tag],
        1. * obs_d[1][tag] / sum(obs_d[1].values()), statics, p_value)
    return str1
def chisquare_test(observed0, expected0):
    observed = np.array(observed0)
    expected = np.array(expected0)
    if min(observed0) > 5:
        a = chisquare(observed, expected)
    else:
        a = fisher_exact([observed, expected])
    return a[1]
def chisquare_test(observed0, expected0):
    observed = np.array(observed0)
    expected = np.array(expected0)
    if min(observed0) > 5:
        a = chisquare(observed, expected)
    else:
        a = fisher_exact([observed, expected])
    return a[1]
Beispiel #15
0
    def calculate_chi_square(self):

        if self.expected is None:
            expected = self.calculate_expected_values()
        else:
            expected = self.expected

        stat_and_p_value = statistics.chisquare(self.observe, f_exp=expected)

        return stat_and_p_value
Beispiel #16
0
def test_chisquare_ddof_broadcasting():
    # Test that ddof broadcasts correctly.

    # obs has shape (4, 2).  We'll use the default axis=0, so chi2
    # will have shape (2,).
    obs = np.array([[1, 2, 3, 2], [3, 2, 2, 5]]).T

    # ddof has shape (2, 1).  This is broadcast with chi2, so p will
    # have shape (2,2).
    ddof = np.array([[0], [1]])

    chi2, p = mstats.chisquare(obs, ddof=ddof)
    assert_array_equal(chi2, [1.0, 2.0])

    chi20, p0 = mstats.chisquare(obs, ddof=ddof[0, 0])
    assert_array_equal(chi20, [1.0, 2.0])

    chi21, p1 = mstats.chisquare(obs, ddof=ddof[1, 0])
    assert_array_equal(chi21, [1.0, 2.0])
    assert_array_equal(p, np.vstack((p0, p1)))
def test_chisquare_ddof_broadcasting():
    # Test that ddof broadcasts correctly.

    # obs has shape (4, 2).  We'll use the default axis=0, so chi2
    # will have shape (2,).
    obs = np.array([[1, 2, 3, 2], [3, 2, 2, 5]]).T

    # ddof has shape (2, 1).  This is broadcast with chi2, so p will
    # have shape (2,2).
    ddof = np.array([[0], [1]])

    chi2, p = mstats.chisquare(obs, ddof=ddof)
    assert_array_equal(chi2, [1.0, 2.0])

    chi20, p0 = mstats.chisquare(obs, ddof=ddof[0,0])
    assert_array_equal(chi20, [1.0, 2.0])

    chi21, p1 = mstats.chisquare(obs, ddof=ddof[1,0])
    assert_array_equal(chi21, [1.0, 2.0])
    assert_array_equal(p, np.vstack((p0, p1)))
def test_chisquare_masked_arrays():
    # The other tests were taken from the tests for stats.chisquare, so
    # they don't test the function with masked arrays.  Here masked arrays
    # are tested.
    obs = np.array([[8, 8, 16, 32, -1], [-1, -1, 3, 4, 5]]).T
    mask = np.array([[0, 0, 0, 0, 1], [1, 1, 0, 0, 0]]).T
    mobs = ma.masked_array(obs, mask)
    expected_chisq = np.array([24.0, 0.5])

    chisq, p = mstats.chisquare(mobs)
    assert_array_equal(chisq, expected_chisq)
    assert_array_almost_equal(p, stats.chisqprob(expected_chisq, mobs.count(axis=0) - 1))

    chisq, p = mstats.chisquare(mobs.T, axis=1)
    assert_array_equal(chisq, expected_chisq)
    assert_array_almost_equal(p, stats.chisqprob(expected_chisq, mobs.T.count(axis=1) - 1))

    # When axis=None, the two values should have type np.float64.
    chisq, p = mstats.chisquare([1,2,3], axis=None)
    assert_(isinstance(chisq, np.float64))
    assert_(isinstance(p, np.float64))
    assert_equal(chisq, 1.0)
    assert_almost_equal(p, stats.chisqprob(1.0, 2))
Beispiel #19
0
 def solve(self, data, targetname, targetpara):
     if targetname == 'Test_Uniform_Discrete':
         # example test to see if aggregated by some discrete value, the number of sequence follows an uniform distritbution
         nb_study = len(targetpara)
         for i_study in range(nb_study):
             var_study = data[targetpara[i_study]]
             value_list = numpy.unique(var_study)
             value_list.sort()
             var_nb = numpy.empty(len(value_list))
             for i_value in range(len(value_list)):
                 var_nb[i_value] = sum(var_study == value_list[i_value])
             test_chi2 = stats.chisquare(var_nb)
             if test_chi2[1] > 0.005:
                 self.conclusion.append(targetpara[i_study] + ' follows approximately a discrete uniform distribution on: \n       '
                 + str(value_list.values) + '.')
             else:
                 self.conclusion.append(targetpara[i_study] + ' does not follow a discrete uniform distribution on: \n       '
                 + str(value_list.values) + '.')
     return
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  7 14:07:45 2011

@author: Sat Kumar Tomer
@website: www.ambhas.com
@email: [email protected]
"""

# import required modules
from scipy.stats.mstats import chisquare
import numpy as np

f_obs = np.array([10, 15, 20, 30])
f_exp = np.array([10, 5, 15, 30])

c, p = chisquare(f_obs, f_exp)

print(c, p)
Beispiel #21
0
import numpy as np
from scipy.stats.mstats import chisquare
from scipy.stats import fisher_exact

observed = np.array([2, 188])
expected = np.array([1, 80])
a = chisquare(observed, expected)
b = fisher_exact([observed, expected])

print a
print b
Beispiel #22
0
import numpy as np
from scipy.stats.mstats import chisquare
from scipy.stats import fisher_exact

observed = np.array([34, 111])
expected = np.array([71, 281])
a = chisquare(observed, expected)
b = fisher_exact([observed, expected])

print a
print b
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  7 14:07:45 2011

@author: Sat Kumar Tomer
@website: www.ambhas.com
@email: [email protected]
"""

# import required modules
from scipy.stats.mstats import chisquare
import numpy as np

f_obs = np.array([10, 15, 20, 30])
f_exp = np.array([10, 5, 15, 30])

c, p = chisquare(f_obs, f_exp)

print(c,p)
Beispiel #24
0
def test_chisquare(guys):
    """ Get the chi-square p value of the stream """
    counts = count_digs(guys)
    stat, p = chisquare(counts)
    return p