def test2(self): R = DataFrame([('SUBJECT', [1, 2]), ('TIMEOFDAY', ['T1', 'T1']), ('COURSE', ['C1', 'C2']), ('MODEL', ['M1', 'M1']), ('ERROR', [10, 10])]) df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') df.where_update([('ERROR', '=', 10)]) self.assertEqual(repr(df),repr(R))
def test6(self): R = DataFrame([('SUBJECT', [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3]), ('TIMEOFDAY', ['T1', 'T1', 'T1', 'T2', 'T2', 'T2', 'T2', 'T2', 'T2', 'T1', 'T1', 'T1', 'T2', 'T2', 'T2']), ('COURSE', ['C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1', 'C1']), ('MODEL', ['M1', 'M2', 'M3', 'M1', 'M2', 'M3', 'M1', 'M2', 'M3', 'M1', 'M2', 'M3', 'M1', 'M2', 'M3']), ('ERROR', [10, 8, 6, 5, 4, 3, 4, 3, 3, 8, 7, 4, 4, 1, 2])]) df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') df.where_update([('COURSE','=',['C1']),('TIMEOFDAY','in',["T1", "T2"])]) self.assertEqual(repr(df),repr(R))
def test0(self): R = DataFrame([('SUBJECT', [1, 2]), ('TIMEOFDAY', [u'T1', u'T1']), ('COURSE', [u'C1', u'C2']), ('MODEL', [u'M1', u'M1']), ('ERROR', [10, 10])]) df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') df2 = df.where('ERROR = 10') self.assertEqual(repr(df2), repr(R))
def test1(self): self.df1 = DataFrame() self.df2 = DataFrame() self.df1.read_tbl('data/words~ageXcondition.csv') self.df2.read_tbl('data/words~ageXcondition.csv') # add an extra key to df1 self.df1['EXTRA'] = [5 for a in self.df1['AGE']] with self.assertRaises(Exception) as cm: self.df1.attach(self.df2) self.assertEqual(str(cm.exception), 'self and other must have the same columns')
def test00(self): # skip 4 lines # DON'T MESS WITH THE SPACING with open('test.csv', 'wb') as f: f.write(""" x,y,z 1,5,9 2,6,10 3,7,11 4,8,12""") self.df = DataFrame() self.df.read_tbl('data/skiptest.csv', skip=4) D = list(self.df['x']) + \ list(self.df['y']) + \ list(self.df['z']) R = list(range(1, 13)) for (d, r) in zip(D, R): self.assertAlmostEqual(d, r)
def test11(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') D = str(df.descriptives('ERROR')) R = """\ Descriptive Statistics ERROR ========================== count 48.000 mean 3.896 mode 3.000 var 5.797 stdev 2.408 sem 0.348 rms 4.567 min 0.000 Q1 2.000 median 3.000 Q3 5.000 max 10.000 range 10.000 95ci_lower 3.215 95ci_upper 4.577 """ self.assertEqual(D, R)
def test1(self): R = { 'd': [ np.array([ 9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8, 7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9, 23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11 ]), np.array([ 8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7, 7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15, 18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18, 21 ]) ], 'fname': 'output\\box(WORDS~AGE).png', 'maintitle': 'WORDS by AGE', 'xlabels': ['AGE = old', 'AGE = young'] } df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.box_plot('WORDS', ['AGE'], output_dir='output') self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['xlabels'], R['xlabels']) for d, r in zip(np.array(D['d']).flat, np.array(R['d']).flat): self.assertAlmostEqual(d, r)
def test1(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY', 'MODEL'], ['COURSE']) self.assertEqual(repr(eval(repr(pt))), repr(pt))
def test1(self): df = DataFrame() with self.assertRaises(Exception) as cm: df.insert([1, 2, 3, 4]) self.assertEqual(str(cm.exception), 'row must be mappable type')
def test2(self): ## Between-Subjects test df=DataFrame() fname='words~ageXcondition.csv' df.read_tbl(fname) aov=Anova() aov.run(df,'WORDS',bfactors=['AGE','CONDITION'])
def test01(self): df = DataFrame() df.TESTMODE = True df.read_tbl('data/iqbrainsize.txt', delimiter='\t') D = df.scatter_plot('TOTVOL', 'FIQ', output_dir='output') self.assertEqual(None, D['trend'])
def do_anovas(some_df, variable): ''' This method takes a dataframe, returns the pyvttbl anova object for that element ''' pyv_df = DataFrame() if variable == 'qp': pyv_df['qual'] = str_list(some_df['qualification_performance']) pyv_df['vals'] = int_list(some_df['adj_diffs']) elif variable == 'ed': pyv_df['qual'] = str_list(some_df['education_level']) pyv_df['vals'] = int_list(some_df['adj_diffs']) elif variable == 'fp': pyv_df['qual'] = str_list(some_df['fps']) pyv_df['vals'] = int_list(some_df['adj_diffs']) else: return None anova = pyv_df.anova1way('vals', 'qual') anova['omega-sq'] = get_w(anova) return anova
def test1(self): R = """\ t-Test: One Sample for means SUPPRESSION ===================================== Sample Mean 19.541 Hypothesized Pop. Mean 17 Variance 228.326 Observations 384 df 383 t Stat 3.295 alpha 0.050 P(T<=t) one-tail 5.384e-04 t Critical one-tail 1.966 P(T<=t) two-tail 0.001 t Critical two-tail 1.649 P(T<=t) two-tail 0.001 Effect size d 0.168 delta 3.295 Observed power one-tail 0.950 Observed power two-tail 0.908 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') D = df.ttest('SUPPRESSION', pop_mean=17.) self.assertEqual(str(D), R)
def test03(self): # duplicate labels with open('test.csv', 'wb') as f: f.write(""" x,x,x 1,5,9 2,6,10 3,7,11 4,8,12""") self.df = DataFrame() with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. self.df.read_tbl('test.csv', skip=1, labels=True) assert issubclass(w[-1].category, RuntimeWarning) D = list(self.df['x']) + \ list(self.df['x_2'])+ \ list(self.df['x_3']) R = list(range(1, 13)) for (d, r) in zip(D, R): self.assertAlmostEqual(d, r)
def test2(self): R = """Chi-Square: Single Factor SUMMARY 1 2 3 4 5 ===================================================== Observed 7 20 23 9 0 Expected 11.800 11.800 11.800 11.800 11.800 CHI-SQUARE TESTS Value df P ============================================= Pearson Chi-Square 30.746 4 3.450e-06 Likelihood Ratio nan nan nan Observations 59 """ df = DataFrame() df.read_tbl('chi_test.csv') X = df.chisquare1way('RESULT', { 1: 11.8, 2: 11.8, 3: 11.8, 4: 11.8, 5: 11.8 }) self.assertEqual(str(X), R)
def test4(self): """method='full', aggregate=count, invalid row""" R = """\ count(id) member Name=name1, Name=name1, Name=name2, Name=name2, Total Year=2010 Year=2011 Year=2010 Year=2011 ====================================================================== N 0 1 -- 0 1 Y 1 0 -- 1 2 ====================================================================== Total 1 1 -- 1 3 """ df = DataFrame() df.insert({'id': 0, 'Name': 'name1', 'Year': 2010, 'member': 'Y'}) df.insert({'id': 1, 'Name': 'name1', 'Year': 2011, 'member': 'N'}) df.insert({'id': 2, 'Name': 'name2', 'Year': 2011, 'member': 'Y'}) my_pivot = df.pivot('id', rows=['member'], cols=['Name', 'Year'], aggregate='count', method='full') self.assertEqual(R, str(my_pivot))
def test1(self): df = DataFrame() fname = 'error~subjectXtimeofdayXcourseXmodel.csv' df.read_tbl(fname) aov = df.anova('ERROR', wfactors=['TIMEOFDAY', 'COURSE', 'MODEL']) #,transform='windsor05') aov.output2html(fname[:-4] + 'RESULTS.htm')
def test3(self): """method='full', aggregate=count, invalid row""" R = """\ count(id) Name Year member=N member=Y Total ========================================== name1 2010 0 1 1 name1 2011 1 0 1 name2 2010 -- -- -- name2 2011 0 1 1 ========================================== Total 1 2 3 """ df = DataFrame() df.insert({'id': 0, 'Name': 'name1', 'Year': 2010, 'member': 'Y'}) df.insert({'id': 1, 'Name': 'name1', 'Year': 2011, 'member': 'N'}) df.insert({'id': 2, 'Name': 'name2', 'Year': 2011, 'member': 'Y'}) my_pivot = df.pivot('id', rows=['Name', 'Year'], cols=['member'], aggregate='count', method='full') self.assertEqual(R, str(my_pivot))
def test1(self): R="""Bivariate Correlations A B C ====================================================== A spearman 1 0.958 -0.924 Sig (2-tailed) . 9.699e-12 2.259e-09 N 21 21 21 ------------------------------------------------------ B spearman 0.958 1 -0.890 Sig (2-tailed) 9.699e-12 . 0.000 N 21 21 21 ------------------------------------------------------ C spearman -0.924 -0.890 1 Sig (2-tailed) 2.259e-09 0.000 . N 21 21 21 Larzelere and Mulaik Significance Testing Pair i Correlation P alpha/(k-i+1) Sig. ============================================================ A vs. B 1 0.958 9.699e-12 0.017 ** A vs. C 2 0.924 2.259e-09 0.025 ** B vs. C 3 0.890 6.850e-08 0.050 ** """ df=DataFrame() df['A']=[24,61,59,46,43,44,52,43,58,67,62,57,71,49,54,43,53,57,49,56,33] df['B']=[42.93472681237495, 78.87307334936268, 75.37292628918023, 65.49076317291956, 55.55965179772366, 56.777730638998236, 62.19451880792437, 54.73710611356715, 72.10021832823149, 85.94377749485642, 78.2087578930983, 72.01681829338037, 84.27889316830063, 60.20516982367225, 65.6276497088971, 62.36549856901088, 69.18772114281175, 67.00548667483324, 59.042687027269466, 71.99214593063917, 45.00831155783992] df['C']=[-53.05540625388731, -96.33996451998567, -92.32465861908086, -70.90536432779966, -55.953777697739255, -74.12814626217357, -75.89188834814621, -64.24093256012688, -89.62208010083313, -87.41075066046812, -80.40932820298143, -77.99906284144805, -95.31607277596169, -61.672429800914486, -85.26088499198657, -63.4402296673869, -74.84950736563589, -85.00433219746624, -71.5901436929124, -76.43243666219388, -48.01082320924727] cor=df.correlation(['A','B','C'],coefficient='spearman') self.assertEqual(str(cor),R)
def test2(self): df=DataFrame() with self.assertRaises(TypeError) as cm: df['DUM']=42 self.assertEqual(str(cm.exception), "'int' object is not iterable")
def test5(self): from pyvttbl import DataFrame from collections import namedtuple import sys import time ## N = 2560; N = 10 ROW = namedtuple('ROW', ['A', 'B']) dt = DataFrame() start = time.clock() ## interval = 5 ## print('insert\tsecs\tt/i\tt/i*2') ## for i in range(N) : ## dt.insert( ROW(i, i*i)._asdict() ) ## if i>0 and i % interval == 0 : ## now = time.clock() ## delta = now-start ## print('%d\t%f\t%f\t%f' %(i, delta, delta/i, delta/(float(i)*i))) ## interval *=2 ## ## print(dt) dt['A'] = range(2560) dt['B'] = [i * i for i in xrange(2560)] now = time.clock() print(now - start)
def test_kn(self): df = DataFrame() df.read_tbl('data/example.csv') y = [23]*len(df['X']) df['X'] = y self.assertEqual(df.keys(), ['CASE', 'TIME', 'CONDITION', 'X'])
def setUp(self): D = { 'SUBJECT': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100 ], 'AGE': 'old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young' .split(','), 'CONDITION': 'counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention,counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention' .split(','), 'WORDS': [ 9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8, 7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9, 23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11, 8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7, 7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15, 18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18, 21 ], } self.df = DataFrame() self.df.read_tbl('data/words~ageXcondition.csv')
def test0(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) self.assertAlmostEqual(3.22222222222, pt[1, 0], 5)
def test0(self): R = { 'd': [ 9.0, 8.0, 6.0, 8.0, 10.0, 4.0, 6.0, 5.0, 7.0, 7.0, 7.0, 9.0, 6.0, 6.0, 6.0, 11.0, 6.0, 3.0, 8.0, 7.0, 11.0, 13.0, 8.0, 6.0, 14.0, 11.0, 13.0, 13.0, 10.0, 11.0, 12.0, 11.0, 16.0, 11.0, 9.0, 23.0, 12.0, 10.0, 19.0, 11.0, 10.0, 19.0, 14.0, 5.0, 10.0, 11.0, 14.0, 15.0, 11.0, 11.0, 8.0, 6.0, 4.0, 6.0, 7.0, 6.0, 5.0, 7.0, 9.0, 7.0, 10.0, 7.0, 8.0, 10.0, 4.0, 7.0, 10.0, 6.0, 7.0, 7.0, 14.0, 11.0, 18.0, 14.0, 13.0, 22.0, 17.0, 16.0, 12.0, 11.0, 20.0, 16.0, 16.0, 15.0, 18.0, 16.0, 20.0, 22.0, 14.0, 19.0, 21.0, 19.0, 17.0, 15.0, 22.0, 16.0, 22.0, 22.0, 18.0, 21.0 ], 'fname': 'output\\box(WORDS).png', 'maintitle': 'WORDS', 'val': 'WORDS' } df = DataFrame() df.TESTMODE = True df.read_tbl('data/words~ageXcondition.csv') D = df.box_plot('WORDS', output_dir='output') self.assertEqual(D['fname'], R['fname']) self.assertEqual(D['maintitle'], R['maintitle']) self.assertEqual(D['val'], R['val']) for d, r in zip(np.array(D['d']).flat, np.array(R['d']).flat): self.assertAlmostEqual(d, r)
def test2(self): R = { 'A': [ -8.0, -7.0, -3.0, -2.0, -1.0, 1.0, 2.0, 3.0, 4.0, 9.0, -10.0, -9.0, -6.0, -5.0, -4.0, 0.0, 5.0, 6.0, 7.0, 8.0 ], 'B': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 ] } a = [ 4, 8, 1, 5, -7, -5, 9, 7, -8, -10, -1, -4, 3, 0., -2, 6, 2, -9, -3, -6 ] b = [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] df = DataFrame() for A, B in zip(a, b): df.insert({'A': A, 'B': B}) df.sort(['B', 'A']) for d, r in zip(df['A'], R['A']): self.assertAlmostEqual(d, r) for d, r in zip(df['B'], R['B']): self.assertAlmostEqual(d, r)
def test1(self): R = Descriptives([('count', 48.0), ('mean', 3.8958333333333335), ('mode', 3.0), ('var', 5.797429078014184), ('stdev', 2.4077850979716158), ('sem', 0.34753384361617046), ('rms', 4.566636252940086), ('min', 0.0), ('Q1', 2.0), ('median', 3.0), ('Q3', 5.0), ('max', 10.0), ('range', 10.0), ('95ci_lower', 3.2146669998456394), ('95ci_upper', 4.5769996668210275)], cname='ERROR') df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') D=df.descriptives('ERROR') for k in list(D.keys()): self.assertAlmostEqual(D[k],R[k])
def test1(self): R = """\ Chi-Square: Single Factor SUMMARY 1 2 3 4 ============================================ Observed 7 20 23 9 Expected 14.750 14.750 14.750 14.750 CHI-SQUARE TESTS Value df P ======================================== Pearson Chi-Square 12.797 3 0.005 Likelihood Ratio 13.288 3 0.004 Observations 59 POST-HOC POWER Measure ============================== Effect size w 0.466 Non-centrality lambda 12.797 Critical Chi-Square 7.815 Power 0.865 """ df = DataFrame() df.read_tbl('data/chi_test.csv') X = df.chisquare1way('RESULT') self.assertEqual(str(X), R)
def test01(self): """repr test""" R = Descriptives([('count', 100.0), ('mean', 11.61), ('mode', 11.0), ('var', 26.947373737373752), ('stdev', 5.191085988246944), ('sem', 0.5191085988246944), ('rms', 12.707084638106414), ('min', 3.0), ('Q1', 7.0), ('median', 11.0), ('Q3', 15.5), ('max', 23.0), ('range', 20.0), ('95ci_lower', 10.592547146303598), ('95ci_upper', 12.6274528536964)], cname='WORDS') df = DataFrame() df.read_tbl('data/words~ageXcondition.csv') D = eval(repr(df.descriptives('WORDS'))) for k in list(D.keys()): self.assertAlmostEqual(D[k],R[k])
def test07(self): # labels have spaces with open('test.csv', 'wb') as f: f.write(""" y 1, y 2 , y 3 1,5,9 2,6, 3,7,11 4,8,12""") self.df = DataFrame() self.df.read_tbl('test.csv', skip=1, labels=True) print((self.df)) for z in self.df['y 3']: print((type(z))) D = list(self.df['y 1']) + \ list(self.df['y 2']) + \ list(self.df['y 3']) R = [1, 2, 3, 4, 5, 6, 7, 8, 9, np.ma.core.MaskedConstant(), 11, 12] for (d, r) in zip(D, R): self.assertEqual(str(d), str(r))