Python DataFrame 예제들, pyvttbl.DataFrame Python 예제들

예제 #1

0

파일 보기

파일: test_stats.py 프로젝트: flavour/cert

    def test1(self):
        R="""Bivariate Correlations

                         A           B           C
======================================================
A   spearman                 1       0.958      -0.924
    Sig (2-tailed)           .   9.699e-12   2.259e-09
    N                       21          21          21
------------------------------------------------------
B   spearman             0.958           1      -0.890
    Sig (2-tailed)   9.699e-12           .       0.000
    N                       21          21          21
------------------------------------------------------
C   spearman            -0.924      -0.890           1
    Sig (2-tailed)   2.259e-09       0.000           .
    N                       21          21          21

Larzelere and Mulaik Significance Testing

 Pair     i   Correlation       P       alpha/(k-i+1)   Sig.
============================================================
A vs. B   1         0.958   9.699e-12           0.017   **
A vs. C   2         0.924   2.259e-09           0.025   **
B vs. C   3         0.890   6.850e-08           0.050   **   """
        df=DataFrame()
        df['A']=[24,61,59,46,43,44,52,43,58,67,62,57,71,49,54,43,53,57,49,56,33]
        df['B']=[42.93472681237495, 78.87307334936268, 75.37292628918023, 65.49076317291956, 55.55965179772366, 56.777730638998236, 62.19451880792437, 54.73710611356715, 72.10021832823149, 85.94377749485642, 78.2087578930983, 72.01681829338037, 84.27889316830063, 60.20516982367225, 65.6276497088971, 62.36549856901088, 69.18772114281175, 67.00548667483324, 59.042687027269466, 71.99214593063917, 45.00831155783992]
        df['C']=[-53.05540625388731, -96.33996451998567, -92.32465861908086, -70.90536432779966, -55.953777697739255, -74.12814626217357, -75.89188834814621, -64.24093256012688, -89.62208010083313, -87.41075066046812, -80.40932820298143, -77.99906284144805, -95.31607277596169, -61.672429800914486, -85.26088499198657, -63.4402296673869, -74.84950736563589, -85.00433219746624, -71.5901436929124, -76.43243666219388, -48.01082320924727]

        cor=df.correlation(['A','B','C'],coefficient='spearman')
        self.assertEqual(str(cor),R)

예제 #2

0

파일 보기

파일: test_stats_histogram.py 프로젝트: marsja/pyvttbl

    def test02(self):
        df=DataFrame()
        df.read_tbl('data/words~ageXcondition.csv')
        D = repr(df.histogram('WORDS'))
        R = "Histogram([('values', [4.0, 14.0, 17.0, 12.0, 15.0, 10.0, 9.0, 5.0, 6.0, 8.0]), \
('bin_edges', [3, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, 17.0, 19.0, 21.0, 23])], cname='WORDS')"
        self.assertEqual(D, R)

예제 #3

0

파일 보기

파일: test_stats_descriptives.py 프로젝트: marsja/pyvttbl

 def test01(self):
     """repr test"""
     R = Descriptives([('count', 100.0),
                       ('mean', 11.61),
                       ('mode', 11.0),
                       ('var', 26.947373737373752),
                       ('stdev', 5.191085988246944),
                       ('sem', 0.5191085988246944),
                       ('rms', 12.707084638106414),
                       ('min', 3.0),
                       ('Q1', 7.0),
                       ('median', 11.0),
                       ('Q3', 15.5),
                       ('max', 23.0),
                       ('range', 20.0),
                       ('95ci_lower', 10.592547146303598),
                       ('95ci_upper', 12.6274528536964)],
                      cname='WORDS')
     
     df = DataFrame()
     df.read_tbl('data/words~ageXcondition.csv')
     D = eval(repr(df.descriptives('WORDS')))
     
     for k in D.keys():
         self.failUnlessAlmostEqual(D[k],R[k])

예제 #4

0

파일 보기

파일: test_df__setitem__.py 프로젝트: marsja/pyvttbl

 def test_kn(self):
     df = DataFrame()
     df.read_tbl('data/example.csv')
     y = [23]*len(df['X'])
     df['X'] = y
     
     self.assertEqual(df.keys(), ['CASE', 'TIME', 'CONDITION', 'X'])

예제 #5

0

파일 보기

파일: test_df_where.py 프로젝트: marsja/pyvttbl

 def test2(self):
     R = DataFrame([('SUBJECT', [1, 2]), ('TIMEOFDAY', [u'T1', u'T1']), ('COURSE', [u'C1', u'C2']), ('MODEL', [u'M1', u'M1']), ('ERROR', [10, 10])])
     
     df=DataFrame()
     df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
     df2 = df.where([('ERROR', '=', 10)])
     self.assertEqual(repr(df2),repr(df2))

예제 #6

0

파일 보기

파일: test_df_where.py 프로젝트: marsja/pyvttbl

 def test6(self):
     R = DataFrame([('SUBJECT', [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3]), ('TIMEOFDAY', [u'T1', u'T1', u'T1', u'T2', u'T2', u'T2', u'T2', u'T2', u'T2', u'T1', u'T1', u'T1', u'T2', u'T2', u'T2']), ('COURSE', [u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1', u'C1']), ('MODEL', [u'M1', u'M2', u'M3', u'M1', u'M2', u'M3', u'M1', u'M2', u'M3', u'M1', u'M2', u'M3', u'M1', u'M2', u'M3']), ('ERROR', [10, 8, 6, 5, 4, 3, 4, 3, 3, 8, 7, 4, 4, 1, 2])])
     
     df=DataFrame()
     df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
     df2 = df.where([('COURSE','=',['C1']),('TIMEOFDAY','in',["T1", "T2"])])
     self.assertEqual(repr(df2),repr(R))

예제 #7

0

파일 보기

파일: test_stats_chisquare1way.py 프로젝트: marsja/pyvttbl

    def test2(self):
        R="""\
Chi-Square: Single Factor

SUMMARY
             1        2        3        4        5    
=====================================================
Observed        7       20       23        9        0 
Expected   11.800   11.800   11.800   11.800   11.800 

CHI-SQUARE TESTS
                     Value    df       P     
============================================
Pearson Chi-Square   30.746    4   3.450e-06 
Likelihood Ratio         --   --          -- 
Observations             59                  

POST-HOC POWER
       Measure                 
==============================
Effect size w            0.722 
Non-centrality lambda   30.746 
Critical Chi-Square      9.488 
Power                    0.998 """

        df = DataFrame()
        df.read_tbl('data/chi_test.csv')
        X=df.chisquare1way('RESULT',{1:11.8 ,2:11.8 ,3:11.8 ,4:11.8 ,5:11.8})
        
        self.assertEqual(str(X),R)

예제 #8

0

파일 보기

파일: test_plotting_box_plot.py 프로젝트: marsja/pyvttbl

    def test1(self):
        R = {'d': [np.array([ 9,  8,  6,  8, 10,  4,  6,  5,  7,  7,
                              7,  9,  6,  6,  6, 11,  6,  3,  8,  7,
                              11, 13,  8,  6, 14, 11, 13, 13, 10, 11,
                              12, 11, 16, 11,  9, 23, 12, 10, 19, 11,
                              10, 19, 14,  5, 10, 11, 14, 15, 11, 11]),
                   np.array([ 8,  6,  4,  6,  7,  6,  5,  7,  9,  7,
                              10,  7,  8, 10,  4,  7, 10, 6,  7,  7,
                              14, 11, 18, 14, 13, 22, 17, 16, 12, 11,
                              20, 16, 16, 15, 18, 16, 20, 22, 14, 19,
                              21, 19, 17, 15, 22, 16, 22, 22, 18, 21])],
             'fname': 'output\\box(WORDS~AGE).png',
             'maintitle': 'WORDS by AGE',
             'xlabels': [u'AGE = old', u'AGE = young']}
        
        df=DataFrame()
        df.TESTMODE=True
        df.read_tbl('data/words~ageXcondition.csv')
        D=df.box_plot('WORDS',['AGE'], output_dir='output')

        self.assertEqual(D['fname'],R['fname'])
        self.assertEqual(D['maintitle'],R['maintitle'])
        self.assertEqual(D['xlabels'],R['xlabels'])
        
        for d,r in zip(np.array(D['d']).flat,
                       np.array(R['d']).flat):
            self.assertAlmostEqual(d,r)

예제 #9

0

파일 보기

파일: test_pt__getitem__.py 프로젝트: marsja/pyvttbl

 def test0(self):
     
     df=DataFrame()
     df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
     pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])
 
     self.assertAlmostEqual(3.22222222222,pt[1,0],5)

예제 #10

0

파일 보기

파일: test_stats.py 프로젝트: flavour/cert

 def test2(self):
     ## Between-Subjects test
     df=DataFrame()
     fname='words~ageXcondition.csv'
     df.read_tbl(fname)
     aov=Anova()
     aov.run(df,'WORDS',bfactors=['AGE','CONDITION'])

예제 #11

0

파일 보기

파일: test_plotting_box_plot.py 프로젝트: marsja/pyvttbl

 def test0(self):
     R = {'d': [9.0, 8.0, 6.0, 8.0, 10.0, 4.0, 6.0, 5.0, 7.0, 7.0,
                7.0, 9.0, 6.0, 6.0, 6.0, 11.0, 6.0, 3.0, 8.0, 7.0,
                11.0, 13.0, 8.0, 6.0, 14.0, 11.0, 13.0, 13.0, 10.0,
                11.0, 12.0, 11.0, 16.0, 11.0, 9.0, 23.0, 12.0, 10.0,
                19.0, 11.0, 10.0, 19.0, 14.0, 5.0, 10.0, 11.0, 14.0,
                15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 6.0, 7.0, 6.0, 5.0,
                7.0, 9.0, 7.0, 10.0, 7.0, 8.0, 10.0, 4.0, 7.0, 10.0,
                6.0, 7.0, 7.0, 14.0, 11.0, 18.0, 14.0, 13.0, 22.0, 17.0,
                16.0, 12.0, 11.0, 20.0, 16.0, 16.0, 15.0, 18.0, 16.0,
                20.0, 22.0, 14.0, 19.0, 21.0, 19.0, 17.0, 15.0, 22.0,
                16.0, 22.0, 22.0, 18.0, 21.0],
          'fname': 'output\\box(WORDS).png',
          'maintitle': 'WORDS',
          'val': 'WORDS'}
     
     df=DataFrame()
     df.TESTMODE=True
     df.read_tbl('data/words~ageXcondition.csv')
     D=df.box_plot('WORDS', output_dir='output')
     
     self.assertEqual(D['fname'],R['fname'])
     self.assertEqual(D['maintitle'],R['maintitle'])
     self.assertEqual(D['val'],R['val'])
     
     for d,r in zip(np.array(D['d']).flat,
                    np.array(R['d']).flat):
         self.assertAlmostEqual(d,r)

예제 #12

0

파일 보기

파일: test_stats_ttest_1sample.py 프로젝트: marsja/pyvttbl

    def test1(self):
        R="""\
t-Test: One Sample for means

                          SUPPRESSION 
=====================================
Sample Mean                    19.541 
Hypothesized Pop. Mean             17 
Variance                      228.326 
Observations                      384 
df                                383 
t Stat                          3.295 
alpha                           0.050 
P(T<=t) one-tail            5.384e-04 
t Critical one-tail             1.966 
P(T<=t) two-tail                0.001 
t Critical two-tail             1.649 
P(T<=t) two-tail                0.001 
Effect size d                   0.168 
delta                           3.295 
Observed power one-tail         0.950 
Observed power two-tail         0.908 """
        
        df = DataFrame()
        df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv')
        D=df.ttest('SUPPRESSION', pop_mean=17.)
        self.assertEqual(str(D),R)

예제 #13

0

파일 보기

파일: test_pt__repr__.py 프로젝트: marsja/pyvttbl

    def test2(self):
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['MODEL','TIMEOFDAY'],['COURSE'],where=['SUBJECT != 1'])

        self.assertEqual(repr(eval(repr(pt))), repr(pt))

예제 #14

0

파일 보기

파일: test_stats_chisquare1way.py 프로젝트: marsja/pyvttbl

    def test1(self):
        R="""\
Chi-Square: Single Factor

SUMMARY
             1        2        3        4    
============================================
Observed        7       20       23        9 
Expected   14.750   14.750   14.750   14.750 

CHI-SQUARE TESTS
                     Value    df     P   
========================================
Pearson Chi-Square   12.797    3   0.005 
Likelihood Ratio     13.288    3   0.004 
Observations             59              

POST-HOC POWER
       Measure                 
==============================
Effect size w            0.466 
Non-centrality lambda   12.797 
Critical Chi-Square      7.815 
Power                    0.865 """

        df = DataFrame()
        df.read_tbl('data/chi_test.csv')
        X=df.chisquare1way('RESULT')
        self.assertEqual(str(X),R)

예제 #15

0

파일 보기

파일: test_stats_descriptives.py 프로젝트: marsja/pyvttbl

    def test11(self):
        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')

        D = str(df.descriptives('ERROR'))
        R = """\
Descriptive Statistics
  ERROR
==========================
 count        48.000 
 mean          3.896 
 mode          3.000 
 var           5.797 
 stdev         2.408 
 sem           0.348 
 rms           4.567 
 min           0.000 
 Q1            2.000 
 median        3.000 
 Q3            5.000 
 max          10.000 
 range        10.000 
 95ci_lower    3.215 
 95ci_upper    4.577 """
        self.assertEqual(D, R)

예제 #16

0

파일 보기

파일: test_stats_descriptives.py 프로젝트: marsja/pyvttbl

    def test1(self):
        R = Descriptives([('count', 48.0),
                          ('mean', 3.8958333333333335),
                          ('mode', 3.0),
                          ('var', 5.797429078014184),
                          ('stdev', 2.4077850979716158),
                          ('sem', 0.34753384361617046),
                          ('rms', 4.566636252940086),
                          ('min', 0.0),
                          ('Q1', 2.0),
                          ('median', 3.0),
                          ('Q3', 5.0),
                          ('max', 10.0),
                          ('range', 10.0),
                          ('95ci_lower', 3.2146669998456394),
                          ('95ci_upper', 4.5769996668210275)],
                         cname='ERROR')
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')

        D=df.descriptives('ERROR')
        
        for k in D.keys():
            self.failUnlessAlmostEqual(D[k],R[k])

예제 #17

0

파일 보기

파일: views.py 프로젝트: alabarga/calculadoraCO2

def export_csv_pivot(request, entidad=1, ano=str(date.today().year)):

    consumos = Consumo.objects.filter(entidad__pk=entidad, ano=ano)
    
    from collections import namedtuple
    LineaDetalle = namedtuple('LineaDetalle',[u'Año', "Mes", 'Local_o_Vehiculo', "Consumo", "Valor"])

    df = DataFrame()    
    
    for c in consumos:

        if c.content_type.id == 16:
            denominacion = Local.objects.get(pk=c.object_id).denominacion
        else:
            denominacion = Vehiculo.objects.get(pk=c.object_id).denominacion

        df.insert(LineaDetalle(c.ano, c.mes, denominacion.encode("utf-8"), c.medida.denominacion.encode("utf-8"), c.valor)._asdict())

    pt = df.pivot("Valor", ['Local_o_Vehiculo','Consumo'], ['Mes'])

    # get the response object, this can be used as a stream.
    response = HttpResponse(mimetype='text/csv')
    # force download.
    response['Content-Disposition'] = 'attachment;filename=export.csv'

    response.write(pt)

    return response

예제 #18

0

파일 보기

파일: test_plotting_box_plot.py 프로젝트: marsja/pyvttbl

    def test3(self):
        df=DataFrame()
  
        with self.assertRaises(Exception) as cm:
            df.box_plot('a', output_dir='output')

        self.assertEqual(str(cm.exception),
                         'Table must have data to print data')

예제 #19

0

파일 보기

파일: test_df_insert.py 프로젝트: marsja/pyvttbl

    def test1(self):
        df=DataFrame()

        with self.assertRaises(Exception) as cm:
            df.insert([1,2,3,4])

        self.assertEqual(str(cm.exception),
                         'row must be mappable type')

예제 #20

0

파일 보기

파일: test_df__setitem__.py 프로젝트: marsja/pyvttbl

 def test12(self):
     df=DataFrame()
     df['DUM']=range(48) # Shouldn't complain
     self.assertEqual(df.keys(),['DUM'])
     
     df['DUM']=['A' for i in range(48)] # Shouldn't complain
     self.assertEqual(df.keys(),['DUM'])
     self.assertEqual(df._sqltypesdict['DUM'],'text')

예제 #21

0

파일 보기

파일: test_df_are_col_lengths_equal.py 프로젝트: marsja/pyvttbl

 def test3(self):
     """unequal"""
     df=DataFrame()
     df[1]=range(10)
     df[2]=range(10)
     df[3]=range(10)
     df[4]=range(9)
     self.assertFalse(df._are_col_lengths_equal())

예제 #22

0

파일 보기

파일: test_stats.py 프로젝트: flavour/cert

 def test0(self):
     ## Within test
     df=DataFrame()
     fname='error~subjectXtimeofdayXcourseXmodel.csv'
     df.read_tbl(fname)
     aov=Anova()
     aov.run(df,'ERROR',wfactors=['TIMEOFDAY','COURSE','MODEL'])#,transform='windsor05')
     aov.output2html(fname[:-4]+'RESULTS.htm')

예제 #23

0

파일 보기

파일: test_plotting_scatter_plot.py 프로젝트: marsja/pyvttbl

    def test01(self):
        df=DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/iqbrainsize.txt', delimiter='\t')
        D = df.scatter_plot('TOTVOL','FIQ',
                        output_dir='output')

        self.assertEqual(None, D['trend'])

예제 #24

0

파일 보기

파일: test_plotting_histogram_plot.py 프로젝트: marsja/pyvttbl

    def test1(self):
        
        df=DataFrame()
        df.TESTMODE=True
        df.read_tbl('data/words~ageXcondition.csv')
        D=df.histogram_plot('WORDS', cumulative=True, output_dir='output')

        self.assertEqual(D['fname'],'output\\hist(WORDS,cumulative=True).png')

예제 #25

0

파일 보기

파일: test_df_are_col_lengths_equal.py 프로젝트: marsja/pyvttbl

 def test2(self):
     """equal non-zero"""
     df=DataFrame()
     df[1]=range(10)
     df[2]=range(10)
     df[3]=range(10)
     df[4]=range(10)
     self.assertTrue(df._are_col_lengths_equal())

예제 #26

0

파일 보기

파일: test_stats.py 프로젝트: flavour/cert

 def test3(self):
     ## Mixed Between/Within test
     df=DataFrame()
     fname='suppression~subjectXgroupXcycleXphase.csv'
     df.read_tbl(fname)
     df['SUPPRESSION']=[.01*x for x in df['SUPPRESSION']]
     aov=Anova()
     aov.run(df,'SUPPRESSION',wfactors=['CYCLE','PHASE'],bfactors=['GROUP'])#,transform='win

예제 #27

0

파일 보기

파일: test_stats_marginals.py 프로젝트: marsja/pyvttbl

    def test05(self):
        R = """Marginals([('factorials', OrderedDict([('AGE', [u'old', u'old', u'old', u'old', u'old']), ('CONDITION', [u'adjective', u'counting', u'imagery', u'intention', u'rhyming'])])), ('dmu', [11.0, 7.0, 13.4, 12.0, 6.9000000000000004]), ('dN', [10, 10, 10, 10, 10]), ('dsem', [0.78881063774661542, 0.57735026918962573, 1.4236104336041748, 1.1832159566199232, 0.67412494720522276]), ('dlower', [9.4539311500166345, 5.868393472388334, 10.609723550135818, 9.6808967250249509, 5.578715103477764]), ('dupper', [12.546068849983365, 8.131606527611666, 16.190276449864182, 14.319103274975049, 8.2212848965222367])], val='WORDS', factors=['AGE', 'CONDITION'], where='AGE == "old"')"""

        df=DataFrame()
        df.read_tbl('data/words~ageXcondition.csv')
        D = df.marginals('WORDS',
                              factors=['AGE','CONDITION'],
                              where='AGE == "old"')

예제 #28

0

파일 보기

파일: test_df_validate.py 프로젝트: marsja/pyvttbl

    def test3(self):
        df=DataFrame()
        
        with self.assertRaises(Exception) as cm:
            df.validate({'GROUP' : lambda x: x in ['AA', 'AB', 'LAB']})

        self.assertEqual(str(cm.exception),
                         'table must have data to validate data')

예제 #29

0

파일 보기

파일: test_df_sort.py 프로젝트: marsja/pyvttbl

    def test3(self):
        df=DataFrame()
  
        with self.assertRaises(Exception) as cm:
            df.sort()

        self.assertEqual(str(cm.exception),
                         'Table must have data to sort data')

예제 #30

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: marsja/pyvttbl

    def test3(self):
        R = {'aggregate': 'ci',
             'clevels': ['I', 'II'],
             'fname': 'output\\whereGROUPnotLAB.png',
             'maintitle': 'SUPPRESSION by CYCLE * AGE * PHASE * GROUP',
             'numcols': 2,
             'numrows': 2,
             'rlevels': ['AA', 'AB'],
             'subplot_titles': ['GROUP = AA, PHASE = AA',
                                'GROUP = AA, PHASE = AA',
                                'GROUP = AB, PHASE = AB',
                                'GROUP = AB, PHASE = AB'],
             'xmaxs': [4.1500000000000004,
                       4.1500000000000004,
                       4.1500000000000004,
                       4.1500000000000004],
             'xmins': [0.84999999999999998,
                       0.84999999999999998,
                       0.84999999999999998,
                       0.84999999999999998],
             'y': [[[ 17.75 ,  22.375,  23.125,  20.25 ],
                    [  8.675,  10.225,  10.5  ,   9.925]],
                   [[ 20.875,  28.125,  20.75 ,  24.25 ],
                    [  8.3  ,  10.25 ,   9.525,  11.1  ]],
                   [[ 12.625,  23.5  ,  20.   ,  15.625],
                    [  5.525,   8.825,   9.125,   7.75 ]],
                   [[ 22.75 ,  41.125,  46.125,  51.75 ],
                    [  8.675,  13.1  ,  14.475,  12.85 ]]],
             'ymax': 64.8719707118471,
             'ymin': 0.0}
                    
        # separate y plots and separate x plots
        df=DataFrame()
        df.TESTMODE = True
        df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv')

        D = df.interaction_plot('SUPPRESSION','CYCLE',
                                seplines='AGE',
                                sepxplots='PHASE',
                                sepyplots='GROUP',yerr='ci',
                                where=[('GROUP','not in',['LAB'])],
                                fname='whereGROUPnotLAB.png',
                                output_dir='output')

        
        self.assertEqual(D['aggregate'],      R['aggregate'])
        self.assertEqual(D['clevels'],        R['clevels'])
        self.assertEqual(D['rlevels'],        R['rlevels'])
        self.assertEqual(D['numcols'],        R['numcols'])
        self.assertEqual(D['numrows'],        R['numrows'])
        self.assertEqual(D['fname'],          R['fname'])
        self.assertEqual(D['maintitle'],      R['maintitle'])
        self.assertEqual(D['subplot_titles'], R['subplot_titles'])
        self.assertAlmostEqual(D['ymin'],     R['ymin'])
        self.assertAlmostEqual(D['ymax'],     R['ymax'])

        for d,r in zip(np.array(D['y']).flat,np.array(R['y']).flat):
            self.assertAlmostEqual(d,r)

예제 #31

0

파일 보기

    def test1(self):
        R = """\
CYCLE   PHASE   GROUP=AA,   GROUP=AA,   GROUP=AB,   GROUP=AB,   GROUP=LAB,   GROUP=LAB, 
                 AGE=old    AGE=young    AGE=old    AGE=young    AGE=old     AGE=young  
=======================================================================================
    1   I          17.750       8.675      12.625       5.525       21.625        7.825 
    1   II         20.875       8.300      22.750       8.675       36.250       13.750 
    2   I          22.375      10.225      23.500       8.825       21.375        9.900 
    2   II         28.125      10.250      41.125      13.100       46.875       14.375 
    3   I          23.125      10.500      20.000       9.125       23.750        9.500 
    3   II         20.750       9.525      46.125      14.475       50.375       15.575 
    4   I          20.250       9.925      15.625       7.750       26.375        9.650 
    4   II         24.250      11.100      51.750      12.850       46.500       14.425 """
        df = DataFrame()
        df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv')
        pt = df.pivot('SUPPRESSION',
                      rows=['CYCLE', 'PHASE'],
                      cols=['GROUP', 'AGE'])
        df2 = pt.to_dataframe()

        self.assertEqual(str(df2), R)

예제 #32

0

파일 보기

파일: test_plotting_scatter_plot.py 프로젝트: yk/pyvttbl

    def test01(self):
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/iqbrainsize.txt', delimiter='\t')
        D = df.scatter_plot('TOTVOL', 'FIQ', output_dir='output')

        self.assertEqual(None, D['trend'])

예제 #33

0

파일 보기

파일: test_plotting_box_plot.py 프로젝트: yk/pyvttbl

    def test1(self):
        R = {
            'd': [
                np.array([
                    9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8,
                    7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9,
                    23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11
                ]),
                np.array([
                    8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7,
                    7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15,
                    18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18,
                    21
                ])
            ],
            'fname':
            'output\\box(WORDS~AGE).png',
            'maintitle':
            'WORDS by AGE',
            'xlabels': ['AGE = old', 'AGE = young']
        }

        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/words~ageXcondition.csv')
        D = df.box_plot('WORDS', ['AGE'], output_dir='output')

        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['xlabels'], R['xlabels'])

        for d, r in zip(np.array(D['d']).flat, np.array(R['d']).flat):
            self.assertAlmostEqual(d, r)

예제 #34

0

파일 보기

파일: test_plotting_box_plot.py 프로젝트: yk/pyvttbl

    def test0(self):
        R = {
            'd': [
                9.0, 8.0, 6.0, 8.0, 10.0, 4.0, 6.0, 5.0, 7.0, 7.0, 7.0, 9.0,
                6.0, 6.0, 6.0, 11.0, 6.0, 3.0, 8.0, 7.0, 11.0, 13.0, 8.0, 6.0,
                14.0, 11.0, 13.0, 13.0, 10.0, 11.0, 12.0, 11.0, 16.0, 11.0,
                9.0, 23.0, 12.0, 10.0, 19.0, 11.0, 10.0, 19.0, 14.0, 5.0, 10.0,
                11.0, 14.0, 15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 6.0, 7.0, 6.0,
                5.0, 7.0, 9.0, 7.0, 10.0, 7.0, 8.0, 10.0, 4.0, 7.0, 10.0, 6.0,
                7.0, 7.0, 14.0, 11.0, 18.0, 14.0, 13.0, 22.0, 17.0, 16.0, 12.0,
                11.0, 20.0, 16.0, 16.0, 15.0, 18.0, 16.0, 20.0, 22.0, 14.0,
                19.0, 21.0, 19.0, 17.0, 15.0, 22.0, 16.0, 22.0, 22.0, 18.0,
                21.0
            ],
            'fname':
            'output\\box(WORDS).png',
            'maintitle':
            'WORDS',
            'val':
            'WORDS'
        }

        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/words~ageXcondition.csv')
        D = df.box_plot('WORDS', output_dir='output')

        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['val'], R['val'])

        for d, r in zip(np.array(D['d']).flat, np.array(R['d']).flat):
            self.assertAlmostEqual(d, r)

예제 #35

0

파일 보기

파일: test_df_sort.py 프로젝트: yk/pyvttbl

    def test2(self):
        R = {
            'A': [
                -8.0, -7.0, -3.0, -2.0, -1.0, 1.0, 2.0, 3.0, 4.0, 9.0, -10.0,
                -9.0, -6.0, -5.0, -4.0, 0.0, 5.0, 6.0, 7.0, 8.0
            ],
            'B': [
                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0,
                2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
            ]
        }

        a = [
            4, 8, 1, 5, -7, -5, 9, 7, -8, -10, -1, -4, 3, 0., -2, 6, 2, -9, -3,
            -6
        ]
        b = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]

        df = DataFrame()
        for A, B in zip(a, b):
            df.insert({'A': A, 'B': B})

        df.sort(['B', 'A'])

        for d, r in zip(df['A'], R['A']):
            self.assertAlmostEqual(d, r)

        for d, r in zip(df['B'], R['B']):
            self.assertAlmostEqual(d, r)

예제 #36

0

파일 보기

class Test_writeTable(unittest.TestCase):
    def setUp(self):
        self.df = DataFrame()
        self.df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv')

    def test0(self):
        d = 'data/suppression~subjectXgroupXageXcycleXphase.csv'
        r = 'subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv'
        self.df.write()
        self.assertTrue(fcmp(d, r))

        # clean up
        os.remove(
            './subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv')

    def test1(self):
        # with exclusion
        d = 'data/suppression~subjectXgroupXageXcycleXphase.csv'
        r = 'subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv'
        self.df.write(where=[('AGE', 'not in', ['young'])])
        self.assertTrue(fcmp(d, r))

        # clean up
        os.remove(
            './subjectXsexXageXgroupXcycleXphaseXsuppressionXranddata.csv')

예제 #37

0

파일 보기

    def test06(self):
        R = """\
y 1   y 2   y 3 
===============
  1     5     9 
  2     6    -- 
  3     7    11 
  4     8    12 """

        # labels have spaces
        with open('test.csv', 'wb') as f:
            f.write("""
y 1,y 2,y 3
1,5,9
2,6,
3,7,11
4,8,12""")

        self.df = DataFrame()
        self.df.read_tbl('test.csv', skip=1, labels=True)

        self.assertAlmostEqual(str(self.df), R)

예제 #38

0

파일 보기

    def test05(self):
        R = """\
x   y   z  
==========
1   5    9 
2   6   -- 
3   7   11 
4   8   12 """

        # cell has empty string, comma after 6
        with open('test.csv', 'wb') as f:
            f.write("""
x,y,z
1,5,9
2,6,
3,7,11
4,8,12""")

        self.df = DataFrame()
        self.df.read_tbl('test.csv', skip=1, labels=True)

        self.assertAlmostEqual(str(self.df), R)

예제 #39

0

파일 보기

    def test4(self):
        df = DataFrame()
        df.insert([('GROUP', 'AA'), ('VAL', 1)])

        with self.assertRaises(Exception) as cm:
            df.validate(lambda x: x in ['AA', 'AB', 'LAB'])

        self.assertEqual(str(cm.exception), 'criteria must be mappable type')

예제 #40

0

파일 보기

 def test1(self):
     R = DataFrame([('SUBJECT', [1, 2]), ('TIMEOFDAY', ['T1', 'T1']),
                    ('COURSE', ['C1', 'C2']), ('MODEL', ['M1', 'M1']),
                    ('ERROR', [10, 10])])
     df = DataFrame()
     df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
     df2 = df.where(['ERROR = 10'])
     self.assertEqual(repr(df2), repr(R))

예제 #41

0

파일 보기

파일: test_plotting_histogram_plot.py 프로젝트: taotaohe/pyvttbl

    def test1(self):

        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/words~ageXcondition.csv')
        D = df.histogram_plot('WORDS', cumulative=True, output_dir='output')

        self.assertEqual(D['fname'], 'output\\hist(WORDS,cumulative=True).png')

예제 #42

0

파일 보기

    def test3(self):
        R = """c   b{L@^hsa aj}   a(1%32@) 
===========================
1   a                    34 
2   b                    34 
3   c                    42 
4   d                    34 
5   e                    45 
6   f                    34 """
        df=DataFrame()
        df.PRINTQUERIES = True
        df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'a','c':1})
        df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'b','c':2})
        df.insert({'a(1%32@)':42,'b{L@^hsa aj}':'c','c':3})
        df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'d','c':4})
        df.insert({'a(1%32@)':45,'b{L@^hsa aj}':'e','c':5})
        df.insert({'a(1%32@)':34,'b{L@^hsa aj}':'f','c':6})

        self.assertEqual(R, str(df))

예제 #43

0

파일 보기

    def test2(self):
        R="""Anova: Single Factor on SUPPRESSION

SUMMARY
Groups   Count     Sum      Average   Variance
==============================================
AA         128       2048        16    148.792
AB         128   2510.600    19.614    250.326
LAB        128   2945.000    23.008    264.699

ANOVA
Source of       SS       df       MS        F      P-value
Variation
===========================================================
Treatments    3144.039     2   1572.020   7.104   9.348e-04
Error        84304.687   381    221.272
===========================================================
Total        87448.726   383                                """

        df = DataFrame()
        df.read_tbl('suppression~subjectXgroupXageXcycleXphase.csv')
        aov=df.anova1way('SUPPRESSION','GROUP')
        self.assertEqual(str(aov),R)

예제 #44

0

파일 보기

    def test3(self):
        df = DataFrame()
        df.insert({'A': 1, 'B': 2})

        with self.assertRaises(Exception) as cm:
            df.insert({'A': 1, 'B': 2, 'C': 3})

        self.assertEqual(str(cm.exception),
                         'row must have the same keys as the table')

예제 #45

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: taotaohe/pyvttbl

    def test2(self):
        R = {
            'aggregate':
            'ci',
            'clevels': [1],
            'fname':
            'output\\interaction_plot(SUPPRESSION~CYCLE_X_AGE_X_PHASE,yerr=95% ci).png',
            'maintitle':
            'SUPPRESSION by CYCLE * AGE * PHASE',
            'numcols':
            1,
            'numrows':
            2,
            'rlevels': ['I', 'II'],
            'subplot_titles': ['I', 'II'],
            'xmaxs': [4.1749999999999998, 4.1749999999999998],
            'xmins': [0.32499999999999996, 0.32499999999999996],
            'y': [[[17.33333333, 22.41666667, 22.29166667, 20.75],
                   [7.34166667, 9.65, 9.70833333, 9.10833333]],
                  [[26.625, 38.70833333, 39.08333333, 40.83333333],
                   [10.24166667, 12.575, 13.19166667, 12.79166667]]],
            'yerr': [[1.81325589, 1.44901936, 1.60883063, 1.57118871],
                     [2.49411239, 1.34873573, 1.95209851, 1.35412572]],
            'ymax':
            64.8719707118471,
            'ymin':
            0.0
        }

        # generate yerr
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv')

        D = df.interaction_plot('SUPPRESSION',
                                'CYCLE',
                                seplines='AGE',
                                sepyplots='PHASE',
                                yerr='ci',
                                output_dir='output')

        self.assertEqual(D['aggregate'], R['aggregate'])
        self.assertEqual(D['clevels'], R['clevels'])
        self.assertEqual(D['rlevels'], R['rlevels'])
        self.assertEqual(D['numcols'], R['numcols'])
        self.assertEqual(D['numrows'], R['numrows'])
        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['subplot_titles'], R['subplot_titles'])
        self.assertAlmostEqual(D['ymin'], R['ymin'])
        self.assertAlmostEqual(D['ymax'], R['ymax'])

        for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat):
            self.assertAlmostEqual(d, r)

        for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat):
            self.assertAlmostEqual(d, r)

예제 #46

0

파일 보기

파일: test_stats.py 프로젝트: smeissner/ifrc

    def test0(self):
        R = """Chi-Square: two Factor

SUMMARY
         Guilty     NotGuilty   Total
=====================================
High          105          76     181
        (130.441)    (50.559)
Low           153          24     177
        (127.559)    (49.441)
=====================================
Total         258         100     358

SYMMETRIC MEASURES
                          Value    Approx.
                                    Sig.
===========================================
Cramer's V                0.317   8.686e-10
Contingency Coefficient   0.302   5.510e-09
N of Valid Cases            358

CHI-SQUARE TESTS
                        Value    df       P
===============================================
Pearson Chi-Square      35.930    1   2.053e-09
Continuity Correction   34.532    1   4.201e-09
Likelihood Ratio        37.351    1           0
N of Valid Cases           358                  """
        df = DataFrame()
        df['FAULTS'] = list(Counter(Low=177, High=181).elements())
        df['FAULTS'].reverse()
        df['VERDICT'] = list(Counter(Guilty=153, NotGuilty=24).elements())
        df['VERDICT'].extend(list(
            Counter(Guilty=105, NotGuilty=76).elements()))

        x2 = df.chisquare2way('FAULTS', 'VERDICT')
        self.assertEqual(str(x2), R)

예제 #47

0

파일 보기

파일: test_stats_descriptives.py 프로젝트: yk/pyvttbl

    def test02(self):
        df = DataFrame()
        df.read_tbl('data/words~ageXcondition.csv')
        D = str(df.descriptives('WORDS'))
        R = """\
Descriptive Statistics
  WORDS
==========================
 count        100.000 
 mean          11.610 
 mode          11.000 
 var           26.947 
 stdev          5.191 
 sem            0.519 
 rms           12.707 
 min            3.000 
 Q1             7.000 
 median        11.000 
 Q3            15.500 
 max           23.000 
 range         20.000 
 95ci_lower    10.593 
 95ci_upper    12.627 """
        self.assertEqual(D, R)

예제 #48

0

파일 보기

    def test5(self):

        R = [
            """\
avg(ERROR)
COURSE   TIMEOFDAY=T1   TIMEOFDAY=T2 
====================================
C1              7.167          3.222 """, """\
avg(ERROR)
COURSE   TIMEOFDAY=T1   TIMEOFDAY=T2 
====================================
C2              6.500          2.889 """, """\
avg(ERROR)
COURSE   TIMEOFDAY=T1   TIMEOFDAY=T2 
====================================
C3                  4          1.556 """
        ]

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])

        for r, L in zip(R, pt.transpose()):
            self.assertAlmostEqual(r, str(L))

예제 #49

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: taotaohe/pyvttbl

    def test1(self):
        R = {
            'aggregate':
            None,
            'clevels': ['M1', 'M2', 'M3'],
            'fname':
            'output\\interaction_plot(ERROR~TIMEOFDAY_X_COURSE_X_MODEL,yerr=1.0).png',
            'maintitle':
            'ERROR by TIMEOFDAY * COURSE * MODEL',
            'numcols':
            3,
            'numrows':
            1,
            'rlevels': [1],
            'subplot_titles': ['M1', 'M2', 'M3'],
            'xmaxs': [1.5, 1.5, 1.5],
            'xmins': [-0.5, -0.5, -0.5],
            'y': [[[9., 4.33333333], [8.66666667, 3.66666667],
                   [4.66666667, 1.66666667]],
                  [[7.5, 2.66666667], [6., 2.66666667], [5., 1.66666667]],
                  [[5., 2.66666667], [3.5, 2.33333333],
                   [2.33333333, 1.33333333]]],
            'yerr': [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]],
            'ymax':
            11.119188627248182,
            'ymin':
            0.0
        }

        # specify yerr
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        D = df.interaction_plot('ERROR',
                                'TIMEOFDAY',
                                seplines='COURSE',
                                sepxplots='MODEL',
                                yerr=1.,
                                output_dir='output')

        self.assertEqual(D['aggregate'], R['aggregate'])
        self.assertEqual(D['clevels'], R['clevels'])
        self.assertEqual(D['rlevels'], R['rlevels'])
        self.assertEqual(D['numcols'], R['numcols'])
        self.assertEqual(D['numrows'], R['numrows'])
        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['subplot_titles'], R['subplot_titles'])
        self.assertAlmostEqual(D['ymin'], R['ymin'])
        self.assertAlmostEqual(D['ymax'], R['ymax'])

        for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat):
            self.assertAlmostEqual(d, r)

        for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat):
            self.assertAlmostEqual(d, r)

예제 #50

0

파일 보기

파일: test_stats_chisquare1way.py 프로젝트: yk/pyvttbl

    def test2(self):
        R = """\
Chi-Square: Single Factor

SUMMARY
             1        2        3        4        5    
=====================================================
Observed        7       20       23        9        0 
Expected   11.800   11.800   11.800   11.800   11.800 

CHI-SQUARE TESTS
                     Value    df       P     
============================================
Pearson Chi-Square   30.746    4   3.450e-06 
Likelihood Ratio         --   --          -- 
Observations             59                  

POST-HOC POWER
       Measure                 
==============================
Effect size w            0.722 
Non-centrality lambda   30.746 
Critical Chi-Square      9.488 
Power                    0.998 """

        df = DataFrame()
        df.read_tbl('data/chi_test.csv')
        X = df.chisquare1way('RESULT', {
            1: 11.8,
            2: 11.8,
            3: 11.8,
            4: 11.8,
            5: 11.8
        })

        self.assertEqual(str(X), R)

예제 #51

0

파일 보기

파일: test_stats_ttest_paired.py 프로젝트: taotaohe/pyvttbl

    def test4(self):
        R = """t-Test: Paired Two Sample for means
                            PRE        POST   
=============================================
Mean                        87.250     87.083 
Variance                  1207.659   1166.629 
Observations                    12         12 
Pearson Correlation          0.995            
df                              11            
t Stat                       0.163            
alpha                        0.050            
P(T<=t) one-tail             0.437            
t Critical one-tail          2.201            
P(T<=t) two-tail             0.873            
t Critical two-tail          1.796            
P(T<=t) two-tail             0.873            
Effect size dz               0.047            
delta                        0.163            
Observed power one-tail      0.068            
Observed power two-tail      0.035            """
        df = DataFrame()
        df.read_tbl('data/example2_prepost.csv')
        D = df.ttest('PRE', 'POST', paired=True)
        self.assertEqual(str(D), R)

예제 #52

0

파일 보기

    def test3(self):

        tupa = ('a1','a2','a3')
        tupb = ('a1','b2','b3')
        
        df=DataFrame()
        df.insert([(('a1','a2','a3'),34), (('a1','b2','b3'),1)])
        df.insert([(('a1','a2','a3'),34), (('a1','b2','b3'),2)])
        df.insert([(('a1','a2','a3'),42), (('a1','b2','b3'),3)])

        namea,nameb = df.keys()
        
        self.assertEqual(namea, tupa)
        self.assertEqual(nameb, tupb)

예제 #53

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: taotaohe/pyvttbl

    def test01(self):
        """confidence interval error bars specified"""

        R = {
            'aggregate':
            'ci',
            'clevels': [1],
            'fname':
            'output\\interaction_plot(WORDS~AGE_X_CONDITION,yerr=95% ci).png',
            'maintitle':
            'WORDS by AGE * CONDITION',
            'numcols':
            1,
            'numrows':
            1,
            'rlevels': [1],
            'subplot_titles': [''],
            'xmaxs': [1.5],
            'xmins': [-0.5],
            'y': [[[11.0, 14.8], [7.0, 6.5], [13.4, 17.6], [12.0, 19.3],
                   [6.9, 7.6]]],
            'yerr': [[]],
            'ymin':
            0.0,
            'ymax':
            27.183257964740832
        }

        # a simple plot
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/words~ageXcondition.csv')
        D = df.interaction_plot('WORDS',
                                'AGE',
                                seplines='CONDITION',
                                output_dir='output',
                                yerr='ci')

        self.assertEqual(D['aggregate'], R['aggregate'])
        self.assertEqual(D['clevels'], R['clevels'])
        self.assertEqual(D['rlevels'], R['rlevels'])
        self.assertEqual(D['numcols'], R['numcols'])
        self.assertEqual(D['numrows'], R['numrows'])
        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['subplot_titles'], R['subplot_titles'])
        self.assertAlmostEqual(D['ymin'], R['ymin'])
        self.assertAlmostEqual(D['ymax'], R['ymax'])

        for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat):
            self.assertAlmostEqual(d, r)

        for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat):
            self.assertAlmostEqual(d, r)

예제 #54

0

파일 보기

    def test1(self):
        """chi-square 2-way"""
        R = """\
Chi-Square: two Factor

SUMMARY
            Litter      Removed    Trash Can   Total 
====================================================
Countrol         385         477          41     903 
           (343.976)   (497.363)    (61.661)         
Message          290         499          80     869 
           (331.024)   (478.637)    (59.339)         
====================================================
Total            675         976         121    1772 

SYMMETRIC MEASURES
                          Value    Approx.  
                                    Sig.    
===========================================
Cramer's V                0.121   3.510e-07 
Contingency Coefficient   0.120   4.263e-07 
N of Valid Cases           1772             

CHI-SQUARE TESTS
                     Value    df       P     
============================================
Pearson Chi-Square   25.794    2   2.506e-06 
Likelihood Ratio     26.056    2   2.198e-06 
N of Valid Cases       1772                  

CHI-SQUARE POST-HOC POWER
       Measure                 
==============================
Effect size w            0.121 
Non-centrality lambda   25.794 
Critical Chi-Square      5.991 
Power                    0.997 """

        df = DataFrame()
        rfactors = ['Countrol'] * 903 + ['Message'] * 869
        cfactors = ['Trash Can'] * 41 + ['Litter'] * 385 + ['Removed'] * 477
        cfactors += ['Trash Can'] * 80 + ['Litter'] * 290 + ['Removed'] * 499

        x2 = ChiSquare2way()
        x2.run(rfactors, cfactors)
        self.assertEqual(str(x2), R)

예제 #55

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: taotaohe/pyvttbl

    def test4(self):
        R = {
            'aggregate':
            None,
            'clevels':
            ['adjective', 'counting', 'imagery', 'intention', 'rhyming'],
            'fname':
            'output\\interaction_plot(WORDS~AGE_X_CONDITION).png',
            'maintitle':
            'WORDS by AGE * CONDITION',
            'numcols':
            5,
            'numrows':
            1,
            'rlevels': [1],
            'subplot_titles':
            ['adjective', 'counting', 'imagery', 'intention', 'rhyming'],
            'xmaxs': [1.5, 1.5, 1.5, 1.5, 1.5],
            'xmins': [-0.5, -0.5, -0.5, -0.5, -0.5],
            'y': [[11., 14.8], [7., 6.5], [13.4, 17.6], [12., 19.3],
                  [6.9, 7.6]],
            'yerr': [[], [], [], [], []],
            'ymax':
            27.183257964740832,
            'ymin':
            0.0
        }

        # a simple plot
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data/words~ageXcondition.csv')
        D = df.interaction_plot('WORDS',
                                'AGE',
                                sepxplots='CONDITION',
                                output_dir='output')

        self.assertEqual(D['aggregate'], R['aggregate'])
        self.assertEqual(D['clevels'], R['clevels'])
        self.assertEqual(D['rlevels'], R['rlevels'])
        self.assertEqual(D['numcols'], R['numcols'])
        self.assertEqual(D['numrows'], R['numrows'])
        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['subplot_titles'], R['subplot_titles'])
        self.assertAlmostEqual(D['ymin'], R['ymin'])
        self.assertAlmostEqual(D['ymax'], R['ymax'])

        for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat):
            self.assertAlmostEqual(d, r)

        for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat):
            self.assertAlmostEqual(d, r)

예제 #56

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: taotaohe/pyvttbl

    def test6(self):
        R = {
            'aggregate':
            'ci',
            'clevels': [1],
            'fname':
            'output\\interaction_plot(SUPPRESSION~CYCLE_X_PHASE,yerr=95% ci).png',
            'maintitle':
            'SUPPRESSION by CYCLE * PHASE',
            'numcols':
            1,
            'numrows':
            2,
            'rlevels': ['I', 'II'],
            'subplot_titles': ['I', 'II'],
            'xmaxs': [4.1749999999999998, 4.1749999999999998],
            'xmins': [0.82499999999999996, 0.82499999999999996],
            'y': [[12.3375, 16.03333333, 16., 14.92916667],
                  [18.43333333, 25.64166667, 26.1375, 26.8125]],
            'yerr': [[3.18994762, 3.20528834, 3.26882751, 3.53477953],
                     [3.98429064, 4.5950803, 4.9514978, 4.97429769]],
            'ymax':
            64.8719707118471,
            'ymin':
            0.0
        }

        # generate yerr
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv')
        D = df.interaction_plot('SUPPRESSION',
                                'CYCLE',
                                sepyplots='PHASE',
                                yerr='ci',
                                output_dir='output')

        self.assertEqual(D['aggregate'], R['aggregate'])
        self.assertEqual(D['clevels'], R['clevels'])
        self.assertEqual(D['rlevels'], R['rlevels'])
        self.assertEqual(D['numcols'], R['numcols'])
        self.assertEqual(D['numrows'], R['numrows'])
        self.assertEqual(D['fname'], R['fname'])
        self.assertEqual(D['maintitle'], R['maintitle'])
        self.assertEqual(D['subplot_titles'], R['subplot_titles'])
        self.assertAlmostEqual(D['ymin'], R['ymin'])
        self.assertAlmostEqual(D['ymax'], R['ymax'])

        for d, r in zip(np.array(D['y']).flat, np.array(R['y']).flat):
            self.assertAlmostEqual(d, r)

        for d, r in zip(np.array(D['yerr']).flat, np.array(R['yerr']).flat):
            self.assertAlmostEqual(d, r)

예제 #57

0

파일 보기

파일: test_plotting_interaction_plot.py 프로젝트: taotaohe/pyvttbl

    def test31(self):

        # separate y plots and separate x plots
        df = DataFrame()
        df.TESTMODE = True
        df.read_tbl('data\suppression~subjectXgroupXageXcycleXphase.csv')

        D = df.interaction_plot('SUPPRESSION',
                                'CYCLE',
                                seplines='AGE',
                                sepxplots='GROUP',
                                sepyplots='PHASE',
                                yerr='sem',
                                output_dir='output')

예제 #58

0

파일 보기

    def test22(self):
        """test with string keys and where condition"""
        df = DataFrame()
        df['1'] = list(range(100))
        df['2'] = ['bob' for i in range(100)]
        df['3'] = [i * 1.234232 for i in range(100)]
        df['4'] = ['bob' for i in range(50)] + list(range(50))

        shuffle(df['1'])
        shuffle(df['2'])
        shuffle(df['3'])

        df._build_sqlite3_tbl(list(df.keys())[:2], ['4 not in ("bob")'])

        df._execute('select * from TBL')
        for i, (a, b) in enumerate(df.cur):
            self.assertEqual(a, df['1'][i + 50])
            self.assertEqual(b, df['2'][i + 50])

예제 #59

0

파일 보기

    def test3(self):
        """test with string keys and tuple where condition"""
        df = DataFrame()
        df[1] = list(range(100))
        df[2] = ['bob' for i in range(100)]
        df[3] = [i * 1.234232 for i in range(100)]
        df[4] = ['bob' for i in range(50)] + list(range(50))

        shuffle(df[1])
        shuffle(df[2])
        shuffle(df[3])

        df._build_sqlite3_tbl(list(df.keys())[:2], [(4, '!=', 'bob')])

        df._execute('select * from TBL')
        for i, (a, b) in enumerate(df.cur):
            self.assertEqual(a, df[1][i + 50])
            self.assertEqual(b, df[2][i + 50])

예제 #60

0

파일 보기

파일: data_restructurer.py 프로젝트: yk/pyvttbl

def long2wide(in_fname,
              id,
              dvs,
              between=[],
              within=[],
              covariates=[],
              out_fname=None,
              nested=True):

    # load in_fname into a PyvtTbl object
    print(('reading "%s"...' % in_fname))
    cls = DataFrame()
    cls.read_tbl(in_fname)

    # loop through DVs and append within columns
    d = [sorted(set(cls[id]))]
    header = [id] + covariates + between

    for col in covariates + between:
        z = cls.pivot(col, cols=[id], aggregate='arbitrary')
        d.extend(list(z))

    # start controls whether nested factors are examined
    if nested: start = 1
    else: start = len(within)

    for i, dv in enumerate(dvs):
        print(('\ncollaborating %s' % dv))
        for j in _xrange(start, len(within) + 1):

            for factors in _xunique_combinations(within, j):
                print(('  pivoting', factors, '...'))
                z = cls.pivot(dv, rows=factors, cols=[id], aggregate='avg')
                d.extend(list(z))

                # process headers
                for names in z.rnames:
                    h = '_'.join(('%s.%s' % (f, str(c)) for (f, c) in names))
                    header.append('%s__%s' % (dv, h))

    # Now we can write the data
    if out_fname == None:
        out_fname = 'wide_data.csv'

    with open(out_fname, 'wb') as f:
        wtr = csv.writer(f)
        wtr.writerow([n.upper() for n in header])
        wtr.writerows(list(zip(*d)))  # transpose and write