class Test_pivot_2(unittest.TestCase): def setUp(self): D={ 'SUBJECT':[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100], 'AGE':'old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young'.split(','), 'CONDITION':'counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention,counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention'.split(','), 'WORDS':[9,8,6,8,10,4,6,5,7,7,7,9,6,6,6,11,6,3,8,7,11,13,8,6,14,11,13,13,10,11,12,11,16,11,9,23,12,10,19,11,10,19,14,5,10,11,14,15,11,11,8,6,4,6,7,6,5,7,9,7,10,7,8,10,4,7,10,6,7,7,14,11,18,14,13,22,17,16,12,11,20,16,16,15,18,16,20,22,14,19,21,19,17,15,22,16,22,22,18,21], } self.df=DataFrame() self.df.read_tbl('data/words~ageXcondition.csv') def test12(self): R="""\ avg(WORDS) CONDITION AGE Value ========================== adjective old 11 adjective young 14.800 counting old 7 counting young 6.500 imagery old 13.400 imagery young 17.600 intention old 12 intention young 19.300 rhyming old 6.900 rhyming young 7.600 ========================== Total 11.610 """ D = self.df.pivot('WORDS', rows=['CONDITION','AGE']) list(D) # verify the values in the table self.failUnlessEqual(str(D),R) def test13(self): R="""\ avg(WORDS) CONDITION=adjective, CONDITION=adjective, CONDITION=counting, CONDITION=counting, CONDITION=imagery, CONDITION=imagery, CONDITION=intention, CONDITION=intention, CONDITION=rhyming, CONDITION=rhyming, Total AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young ================================================================================================================================================================================================================================== 11 14.800 7 6.500 13.400 17.600 12 19.300 6.900 7.600 11.610 """ D = self.df.pivot('WORDS', cols=['CONDITION','AGE']) # verify the values in the table self.failUnlessEqual(str(D),R)
def long2wide(in_fname, id, dvs, between=[], within=[], covariates=[], out_fname=None, nested=True): # load in_fname into a PyvtTbl object print(('reading "%s"...' % in_fname)) cls = DataFrame() cls.read_tbl(in_fname) # loop through DVs and append within columns d = [sorted(set(cls[id]))] header = [id] + covariates + between for col in covariates + between: z = cls.pivot(col, cols=[id], aggregate='arbitrary') d.extend(list(z)) # start controls whether nested factors are examined if nested: start = 1 else: start = len(within) for i, dv in enumerate(dvs): print(('\ncollaborating %s' % dv)) for j in _xrange(start, len(within) + 1): for factors in _xunique_combinations(within, j): print((' pivoting', factors, '...')) z = cls.pivot(dv, rows=factors, cols=[id], aggregate='avg') d.extend(list(z)) # process headers for names in z.rnames: h = '_'.join(('%s.%s' % (f, str(c)) for (f, c) in names)) header.append('%s__%s' % (dv, h)) # Now we can write the data if out_fname == None: out_fname = 'wide_data.csv' with open(out_fname, 'wb') as f: wtr = csv.writer(f) wtr.writerow([n.upper() for n in header]) wtr.writerows(list(zip(*d))) # transpose and write
def test2(self): df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['MODEL','TIMEOFDAY'],['COURSE'],where=['SUBJECT != 1']) self.assertEqual(repr(eval(repr(pt))), repr(pt))
def test4(self): """method='full', aggregate=count, invalid row""" R = """\ count(id) member Name=name1, Name=name1, Name=name2, Name=name2, Total Year=2010 Year=2011 Year=2010 Year=2011 ====================================================================== N 0 1 -- 0 1 Y 1 0 -- 1 2 ====================================================================== Total 1 1 -- 1 3 """ df = DataFrame() df.insert({'id': 0, 'Name': 'name1', 'Year': 2010, 'member': 'Y'}) df.insert({'id': 1, 'Name': 'name1', 'Year': 2011, 'member': 'N'}) df.insert({'id': 2, 'Name': 'name2', 'Year': 2011, 'member': 'Y'}) my_pivot = df.pivot('id', rows=['member'], cols=['Name', 'Year'], aggregate='count', method='full') self.assertEqual(R, str(my_pivot))
def test3(self): """method='full', aggregate=count, invalid row""" R = """\ count(id) Name Year member=N member=Y Total ========================================== name1 2010 0 1 1 name1 2011 1 0 1 name2 2010 -- -- -- name2 2011 0 1 1 ========================================== Total 1 2 3 """ df = DataFrame() df.insert({'id': 0, 'Name': 'name1', 'Year': 2010, 'member': 'Y'}) df.insert({'id': 1, 'Name': 'name1', 'Year': 2011, 'member': 'N'}) df.insert({'id': 2, 'Name': 'name2', 'Year': 2011, 'member': 'Y'}) my_pivot = df.pivot('id', rows=['Name', 'Year'], cols=['member'], aggregate='count', method='full') self.assertEqual(R, str(my_pivot))
def test0(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) self.assertAlmostEqual(3.22222222222, pt[1, 0], 5)
def test4(self): """method='full', aggregate=tolist, invalid col""" R = """\ tolist(id) member Name=name1, Name=name1, Name=name2, Name=name2, Year=2010 Year=2011 Year=2010 Year=2011 ================================================================== N [None, None] [1.0, 1.0] [None, None] [None, None] Y [0.0, 0.0] [None, None] [None, None] [2.0, 2.0] """ df = DataFrame() df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':1}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':1}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':1}) df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':2}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':2}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':2}) my_pivot = df.pivot('id',rows = ['member'], cols = ['Name','Year'], aggregate='tolist', method='full') ## print(my_pivot) self.assertEqual(R,str(my_pivot))
def test3(self): """method='full', aggregate=tolist, invalid row""" R = """\ tolist(id) Name Year member=N member=Y ========================================== name1 2010 [None, None] [0.0, 0.0] name1 2011 [1.0, 1.0] [None, None] name2 2010 [None, None] [None, None] name2 2011 [None, None] [2.0, 2.0] """ df = DataFrame() df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':1}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':1}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':1}) df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':2}) df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':2}) df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':2}) my_pivot = df.pivot('id',rows = ['Name','Year'], cols = ['member'], aggregate='tolist', method='full') ## print(my_pivot) self.assertEqual(R,str(my_pivot))
def test0(self): df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) self.assertAlmostEqual(3.22222222222,pt[1,0],5)
def test1(self): df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY', 'MODEL'], ['COURSE']) self.assertEqual(repr(eval(repr(pt))), repr(pt))
def export_csv_pivot(request, entidad=1, ano=str(date.today().year)): consumos = Consumo.objects.filter(entidad__pk=entidad, ano=ano) from collections import namedtuple LineaDetalle = namedtuple('LineaDetalle',[u'Año', "Mes", 'Local_o_Vehiculo', "Consumo", "Valor"]) df = DataFrame() for c in consumos: if c.content_type.id == 16: denominacion = Local.objects.get(pk=c.object_id).denominacion else: denominacion = Vehiculo.objects.get(pk=c.object_id).denominacion df.insert(LineaDetalle(c.ano, c.mes, denominacion.encode("utf-8"), c.medida.denominacion.encode("utf-8"), c.valor)._asdict()) pt = df.pivot("Valor", ['Local_o_Vehiculo','Consumo'], ['Mes']) # get the response object, this can be used as a stream. response = HttpResponse(mimetype='text/csv') # force download. response['Content-Disposition'] = 'attachment;filename=export.csv' response.write(pt) return response
def long2wide(in_fname, id, dvs, between=[], within=[], covariates=[], out_fname=None, nested=True): # load in_fname into a PyvtTbl object print('reading "%s"...'%in_fname) cls = DataFrame() cls.read_tbl(in_fname) # loop through DVs and append within columns d = [sorted(set(cls[id]))] header = [id] + covariates + between for col in covariates+between: z = cls.pivot(col, cols=[id], aggregate='arbitrary') d.extend(list(z)) # start controls whether nested factors are examined if nested : start = 1 else : start = len(within) for i, dv in enumerate(dvs): print('\ncollaborating %s'%dv) for j in _xrange(start, len(within)+1): for factors in _xunique_combinations(within, j): print(' pivoting', factors, '...') z = cls.pivot(dv, rows=factors, cols=[id], aggregate='avg') d.extend(list(z)) # process headers for names in z.rnames: h = '_'.join(('%s.%s'%(f, str(c)) for (f,c) in names)) header.append('%s__%s'%(dv, h)) # Now we can write the data if out_fname == None: out_fname = 'wide_data.csv' with open(out_fname,'wb') as f: wtr = csv.writer(f) wtr.writerow([n.upper() for n in header]) wtr.writerows(zip(*d)) # transpose and write
def test2(self): R =[7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889, 1.55555555556] df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY','COURSE']) pt_flat = pt.flatten() for r,d in zip(R,pt_flat): self.assertAlmostEqual(r,d)
def test2(self): R =[7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889, 1.55555555556] df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY','COURSE']) for r,L in zip(R,pt.flat): self.assertEqual('%.5f'%r, '%.5f'%L)
def test2(self): R = """\ GROUP=AA, GROUP=AA, GROUP=AB, GROUP=AB, GROUP=LAB, GROUP=LAB, AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young ======================================================================= 22.188 9.813 29.188 10.041 34.141 11.875 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', cols=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2), R)
def test5(self): R = """\ avg(ERROR) TIMEOFDAY COURSE=C1 ===================== T1 7.167 T2 3.222 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) self.assertEqual(R, str(pt[:, 0]))
def test7(self): R ="""\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 ================================= T1 7.167 6.500 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) self.assertEqual(R, str(pt[0,:2]))
def test2(self): R = """\ GROUP=AA, GROUP=AA, GROUP=AB, GROUP=AB, GROUP=LAB, GROUP=LAB, AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young ======================================================================= 22.188 9.813 29.188 10.041 34.141 11.875 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', cols=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2),R)
def test2(self): R = [ 7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889, 1.55555555556 ] df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY', 'COURSE']) for r, L in zip(R, pt.flat): self.assertEqual('%.5f' % r, '%.5f' % L)
def test3(self): R = np.array( [[7.16666667, 6.5, 4., 3.22222222, 2.88888889, 1.55555556]]) df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY', 'COURSE']) self.assertEqual(str(R.shape), str(pt.shape)) for r, L in zip(R.flatten(), pt.flatten()): self.assertAlmostEqual(r, L)
def test4(self): R = """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T2 3.222 2.889 1.556 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) self.assertEqual(R, str(pt[-1:]))
def test0(self): R = """\ TIME CONDITION=A CONDITION=B ================================= day 1.864 1.933 night 1.622 1.731 """ df = DataFrame() df.read_tbl('data/example.csv') df['LOG10_X']=[math.log10(x) for x in df['X']] pt = df.pivot('LOG10_X', ['TIME'], ['CONDITION']) df2 = pt.to_dataframe() self.assertEqual(str(df2),R)
def test0(self): R = """\ TIME CONDITION=A CONDITION=B ================================= day 1.864 1.933 night 1.622 1.731 """ df = DataFrame() df.read_tbl('data/example.csv') df['LOG10_X'] = [math.log10(x) for x in df['X']] pt = df.pivot('LOG10_X', ['TIME'], ['CONDITION']) df2 = pt.to_dataframe() self.assertEqual(str(df2), R)
def test1(self): R = """\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 51.361 42.250 16 31.574 T2 10.383 8.346 2.420 6.531 ====================================================== Total 23.040 18.778 7.716 15.178 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') sums = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'], aggregate='sum') counts = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'], aggregate='count') aves = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'], aggregate='avg') calc_aves = sums / counts.astype(np.float64) ## print('\n'.join(str(aves).split('\n')[1:])) ## print('\n'.join(str(calc_aves).split('\n')[1:])) self.assertEqual('\n'.join(str(aves).split('\n')[1:]), '\n'.join(str(calc_aves).split('\n')[1:]))
def test1(self): R ="""\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 51.361 42.250 16 31.574 T2 10.383 8.346 2.420 6.531 ====================================================== Total 23.040 18.778 7.716 15.178 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') sums = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='sum') counts = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='count') aves = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='avg') calc_aves = sums/counts.astype(np.float64) ## print('\n'.join(str(aves).split('\n')[1:])) ## print('\n'.join(str(calc_aves).split('\n')[1:])) self.assertEqual('\n'.join(str(aves).split('\n')[1:]), '\n'.join(str(calc_aves).split('\n')[1:]))
def test1(self): R = """\ avg(ERROR) TIMEOFDAY=T1, TIMEOFDAY=T1, TIMEOFDAY=T1, TIMEOFDAY=T2, TIMEOFDAY=T2, TIMEOFDAY=T2, Total COURSE=C1 COURSE=C2 COURSE=C3 COURSE=C1 COURSE=C2 COURSE=C3 ===================================================================================================== 7.167 6.500 4 3.222 2.889 1.556 3.896 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', rows=['TIMEOFDAY', 'COURSE']) pt2 = pt.transpose() self.assertEqual(str(pt2), R)
def test4(self): R ="""\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T2 3.222 2.889 1.556 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) self.assertEqual(R, str(pt[-1:]))
def test0(self): # __add__ constant R = """\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 12.167 11.500 9 10.619 T2 8.222 7.889 6.556 7.556 ====================================================== Total 9.800 9.333 7.778 8.896 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) self.assertAlmostEqual(np.sum(pt), 25.3333333333, 5)
def test2(self): # __add__ ndarray R = """\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T1 12.167 11.500 9 T2 8.222 7.889 6.556 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt2 = pt + np.array([[5, 5, 5], [5, 5, 5]]) self.assertEqual(str(pt2), R)
def test1(self): R ="""\ avg(ERROR) TIMEOFDAY=T1, TIMEOFDAY=T1, TIMEOFDAY=T1, TIMEOFDAY=T2, TIMEOFDAY=T2, TIMEOFDAY=T2, Total COURSE=C1 COURSE=C2 COURSE=C3 COURSE=C1 COURSE=C2 COURSE=C3 ===================================================================================================== 7.167 6.500 4 3.222 2.889 1.556 3.896 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', rows=['TIMEOFDAY','COURSE']) pt2 = pt.transpose() self.assertEqual(str(pt2),R)
def test2(self): # __add__ ndarray R ="""\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T1 12.167 11.500 9 T2 8.222 7.889 6.556 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) pt2=pt+np.array([[5,5,5], [5,5,5]]) self.assertEqual(str(pt2),R)
def test0(self): # __add__ constant R ="""\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 12.167 11.500 9 10.619 T2 8.222 7.889 6.556 7.556 ====================================================== Total 9.800 9.333 7.778 8.896 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) self.assertAlmostEqual(np.sum(pt),25.3333333333, 5)
def test3(self): R =[7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889, 1.55555555556] Rinds = [(0,0),(0,1),(0,2),(0,3),(0,4),(0,5)] df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY','COURSE']) i=0 for inds,L in pt.ndenumerate(): self.assertEqual('%.5f'%L, '%.5f'%R[i]) self.assertEqual(str(inds), str(Rinds[i])) i+=1
def test3(self): R = """\ GROUP AGE Value ====================== AA old 22.188 AA young 9.813 AB old 29.188 AB young 10.041 LAB old 34.141 LAB young 11.875 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', rows=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2), R)
def test3(self): R = """\ GROUP AGE Value ====================== AA old 22.188 AA young 9.813 AB old 29.188 AB young 10.041 LAB old 34.141 LAB young 11.875 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', rows=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2),R)
def test3(self): R =np.array([[7.16666667, 6.5, 4., 3.22222222, 2.88888889, 1.55555556]]) df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY','COURSE']) self.assertEqual(str(R.shape),str(pt.shape)) for r,L in zip(R.flatten(),pt.flatten()): self.assertAlmostEqual(r,L)
def test0(self): R = """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 Total ============================================ C1 7.167 3.222 4.800 C2 6.500 2.889 4.333 C3 4 1.556 2.778 ============================================ Total 5.619 2.556 3.896 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt2 = pt.transpose() self.assertEqual(str(pt2), R)
def test0(self): R = """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ===================================================== T1 7.167 6.500 4 5.619 T2 0 2.889 1.556 2.556 ===================================================== Total 4.800 4.333 2.778 3.896 """ df = DataFrame() df.read_tbl("data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv") pt = df.pivot("ERROR", ["TIMEOFDAY"], ["COURSE"]) pt[1, 0] = 0.0 self.assertEqual(R, str(pt))
def test2(self): # __mul__ ndarray R = """\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T1 35.833 32.500 20 T2 16.111 14.444 7.778 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt2 = pt * np.array([[5, 5, 5], [5, 5, 5]]) ## print(pt2) self.assertEqual(str(pt2), R)
def test2(self): # __mul__ ndarray R ="""\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T1 35.833 32.500 20 T2 16.111 14.444 7.778 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) pt2=pt*np.array([[5,5,5], [5,5,5]]) ## print(pt2) self.assertEqual(str(pt2),R)
def test0(self): R = """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ===================================================== T1 7.167 6.500 4 5.619 T2 0 2.889 1.556 2.556 ===================================================== Total 4.800 4.333 2.778 3.896 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt[1, 0] = 0. self.assertEqual(R, str(pt))
def test1(self): # __add__ PyvtTbl R = """\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 14.333 13 8 11.238 T2 6.444 5.778 3.111 5.111 ====================================================== Total 9.600 8.667 5.556 7.792 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt2 = pt + pt self.assertEqual(str(pt2), R)
def test1(self): R =["""\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T1 7.167 6.500 4 """, """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T2 3.222 2.889 1.556 """] df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) for r,L in zip(R, pt): self.assertAlmostEqual(r, str(L))
def test3(self): R = [ 7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889, 1.55555555556 ] Rinds = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5)] df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY', 'COURSE']) i = 0 for inds, L in pt.ndenumerate(): self.assertEqual('%.5f' % L, '%.5f' % R[i]) self.assertEqual(str(inds), str(Rinds[i])) i += 1
def test1(self): # __add__ PyvtTbl R ="""\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 14.333 13 8 11.238 T2 6.444 5.778 3.111 5.111 ====================================================== Total 9.600 8.667 5.556 7.792 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) pt2=pt+pt self.assertEqual(str(pt2),R)
def test0(self): R ="""\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 Total ============================================ C1 7.167 3.222 4.800 C2 6.500 2.889 4.333 C3 4 1.556 2.778 ============================================ Total 5.619 2.556 3.896 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) pt2 = pt.transpose() self.assertEqual(str(pt2),R)
def test0(self): # __mul__ constant R = """\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 35.833 32.500 20 28.095 T2 16.111 14.444 7.778 12.778 ====================================================== Total 24 21.667 13.889 19.479 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt2 = pt * 5 ## print(pt2) self.assertEqual(str(pt2), R)
def test2(self): R = """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ===================================================== T1 -- -- -- 5.619 T2 3.222 2.889 1.556 2.556 ===================================================== Total 4.800 4.333 2.778 3.896 """ df = DataFrame() df.read_tbl("data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv") pt = df.pivot("ERROR", ["TIMEOFDAY"], ["COURSE"]) pt[0, :] = [0, 0, 0] pt.mask[0, :] = [True, True, True] self.assertEqual(R, str(pt))
def test1(self): R = [ """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T1 7.167 6.500 4 """, """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ============================================= T2 3.222 2.889 1.556 """ ] df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) for r, L in zip(R, pt): self.assertAlmostEqual(r, str(L))
def test2(self): R = """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ===================================================== T1 -- -- -- 5.619 T2 3.222 2.889 1.556 2.556 ===================================================== Total 4.800 4.333 2.778 3.896 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) pt[0, :] = [0, 0, 0] pt.mask[0, :] = [True, True, True] self.assertEqual(R, str(pt))
def test4(self): R =["""\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ================================================================================================================================================================= T1 [10.0, 8.0, 6.0, 8.0, 7.0, 4.0, None, None, None] [9.0, 10.0, 6.0, 4.0, 7.0, 3.0, None, None, None] [7.0, 6.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 2.0] """, """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ========================================================================================================================================================= T2 [5.0, 4.0, 3.0, 4.0, 3.0, 3.0, 4.0, 1.0, 2.0] [4.0, 3.0, 3.0, 4.0, 2.0, 2.0, 3.0, 3.0, 2.0] [2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0] """] df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='tolist') for r,L in zip(R,pt): self.assertEqual(r, str(L))
def test0(self): # __mul__ constant R ="""\ N/A(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 Total ====================================================== T1 35.833 32.500 20 28.095 T2 16.111 14.444 7.778 12.778 ====================================================== Total 24 21.667 13.889 19.479 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE']) pt2=pt*5 ## print(pt2) self.assertEqual(str(pt2),R)
def test2(self): R ="""\ avg(ERROR) TIMEOFDAY COURSE Value ========================== T1 C1 7.167 T1 C2 6.500 T1 C3 4 T2 C1 3.222 T2 C2 2.889 T2 C3 1.556 ========================== Total 3.896 """ df=DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY','COURSE']) pt2 = pt.transpose() self.assertEqual(str(pt2),R)
def test2(self): R = """\ avg(ERROR) TIMEOFDAY COURSE Value ========================== T1 C1 7.167 T1 C2 6.500 T1 C3 4 T2 C1 3.222 T2 C2 2.889 T2 C3 1.556 ========================== Total 3.896 """ df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', cols=['TIMEOFDAY', 'COURSE']) pt2 = pt.transpose() self.assertEqual(str(pt2), R)
def test4(self): R = [ """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ================================================================================================================================================================= T1 [10.0, 8.0, 6.0, 8.0, 7.0, 4.0, None, None, None] [9.0, 10.0, 6.0, 4.0, 7.0, 3.0, None, None, None] [7.0, 6.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 2.0] """, """\ avg(ERROR) TIMEOFDAY COURSE=C1 COURSE=C2 COURSE=C3 ========================================================================================================================================================= T2 [5.0, 4.0, 3.0, 4.0, 3.0, 3.0, 4.0, 1.0, 2.0] [4.0, 3.0, 3.0, 4.0, 2.0, 2.0, 3.0, 3.0, 2.0] [2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0] """ ] df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'], aggregate='tolist') for r, L in zip(R, pt): self.assertEqual(r, str(L))
def test1(self): R = """\ CYCLE PHASE GROUP=AA, GROUP=AA, GROUP=AB, GROUP=AB, GROUP=LAB, GROUP=LAB, AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young ======================================================================================= 1 I 17.750 8.675 12.625 5.525 21.625 7.825 1 II 20.875 8.300 22.750 8.675 36.250 13.750 2 I 22.375 10.225 23.500 8.825 21.375 9.900 2 II 28.125 10.250 41.125 13.100 46.875 14.375 3 I 23.125 10.500 20.000 9.125 23.750 9.500 3 II 20.750 9.525 46.125 14.475 50.375 15.575 4 I 20.250 9.925 15.625 7.750 26.375 9.650 4 II 24.250 11.100 51.750 12.850 46.500 14.425 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', rows=['CYCLE', 'PHASE'], cols=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2), R)
def test1(self): R = """\ CYCLE PHASE GROUP=AA, GROUP=AA, GROUP=AB, GROUP=AB, GROUP=LAB, GROUP=LAB, AGE=old AGE=young AGE=old AGE=young AGE=old AGE=young ======================================================================================= 1 I 17.750 8.675 12.625 5.525 21.625 7.825 1 II 20.875 8.300 22.750 8.675 36.250 13.750 2 I 22.375 10.225 23.500 8.825 21.375 9.900 2 II 28.125 10.250 41.125 13.100 46.875 14.375 3 I 23.125 10.500 20.000 9.125 23.750 9.500 3 II 20.750 9.525 46.125 14.475 50.375 15.575 4 I 20.250 9.925 15.625 7.750 26.375 9.650 4 II 24.250 11.100 51.750 12.850 46.500 14.425 """ df = DataFrame() df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv') pt = df.pivot('SUPPRESSION', rows=['CYCLE', 'PHASE'], cols=['GROUP', 'AGE']) df2 = pt.to_dataframe() self.assertEqual(str(df2),R)
def test5(self): R = [ """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 ==================================== C1 7.167 3.222 """, """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 ==================================== C2 6.500 2.889 """, """\ avg(ERROR) COURSE TIMEOFDAY=T1 TIMEOFDAY=T2 ==================================== C3 4 1.556 """ ] df = DataFrame() df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv') pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE']) for r, L in zip(R, pt.transpose()): self.assertAlmostEqual(r, str(L))
class Test_pivot_1(unittest.TestCase): def setUp(self): D = { 'SUBJECT': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100 ], 'AGE': 'old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,old,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young,young' .split(','), 'CONDITION': 'counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention,counting,counting,counting,counting,counting,counting,counting,counting,counting,counting,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,rhyming,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,adjective,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,imagery,intention,intention,intention,intention,intention,intention,intention,intention,intention,intention' .split(','), 'WORDS': [ 9, 8, 6, 8, 10, 4, 6, 5, 7, 7, 7, 9, 6, 6, 6, 11, 6, 3, 8, 7, 11, 13, 8, 6, 14, 11, 13, 13, 10, 11, 12, 11, 16, 11, 9, 23, 12, 10, 19, 11, 10, 19, 14, 5, 10, 11, 14, 15, 11, 11, 8, 6, 4, 6, 7, 6, 5, 7, 9, 7, 10, 7, 8, 10, 4, 7, 10, 6, 7, 7, 14, 11, 18, 14, 13, 22, 17, 16, 12, 11, 20, 16, 16, 15, 18, 16, 20, 22, 14, 19, 21, 19, 17, 15, 22, 16, 22, 22, 18, 21 ], } self.df = DataFrame() self.df.read_tbl('data/words~ageXcondition.csv') def test0(self): R = """\ avg(WORDS) AGE CONDITION=adjective CONDITION=counting CONDITION=imagery CONDITION=intention CONDITION=rhyming Total ======================================================================================================================= old 11 7 13.400 12 6.900 10.060 young 14.800 6.500 17.600 19.300 7.600 13.160 ======================================================================================================================= Total 12.900 6.750 15.500 15.650 7.250 11.610 """ D = self.df.pivot('WORDS', rows=['AGE'], cols=['CONDITION']) ## print(repr(D)) # verify the values in the table self.failUnlessEqual(str(D), R) def test2(self): R = """\ avg(WORDS) CONDITION=adjective CONDITION=counting CONDITION=imagery CONDITION=intention CONDITION=rhyming Total =============================================================================================================== 12.900 6.750 15.500 15.650 7.250 11.610 """ D = self.df.pivot('WORDS', cols=['CONDITION']) # verify the values in the table self.failUnlessEqual(str(D), R) def test3(self): R = """\ avg(WORDS) CONDITION Value ================== adjective 12.900 counting 6.750 imagery 15.500 intention 15.650 rhyming 7.250 ================== Total 11.610 """ D = self.df.pivot('WORDS', rows=['CONDITION']) # verify the values in the table self.failUnlessEqual(str(D), R) def test4(self): R = """\ stdev(WORDS) Value ===== 5.191 """ # No rows or cols D = self.df.pivot('WORDS', aggregate='stdev') # verify the values in the table self.failUnlessEqual(str(D), R) def test6(self): # tolist handles text data differently then integer # or float data. We need to test this case as well R = """\ tolist(ABC) AGE CONDITION=adjective CONDITION=counting CONDITION=imagery CONDITION=intention CONDITION=rhyming ================================================================================================================================================================================================================================================================================================================================ old [u'L', u'N', u'I', u'G', u'O', u'L', u'N', u'N', u'K', u'L'] [u'J', u'I', u'G', u'I', u'K', u'E', u'G', u'F', u'H', u'H'] [u'M', u'L', u'Q', u'L', u'J', u'X', u'M', u'K', u'T', u'L'] [u'K', u'T', u'O', u'F', u'K', u'L', u'O', u'P', u'L', u'L'] [u'H', u'J', u'G', u'G', u'G', u'L', u'G', u'D', u'I', u'H'] young [u'O', u'L', u'S', u'O', u'N', u'W', u'R', u'Q', u'M', u'L'] [u'I', u'G', u'E', u'G', u'H', u'G', u'F', u'H', u'J', u'H'] [u'U', u'Q', u'Q', u'P', u'S', u'Q', u'U', u'W', u'O', u'T'] [u'V', u'T', u'R', u'P', u'W', u'Q', u'W', u'W', u'S', u'V'] [u'K', u'H', u'I', u'K', u'E', u'H', u'K', u'G', u'H', u'H'] """ # caesar cipher num2abc = dict(zip(list(range(26)), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')) self.df['ABC'] = [num2abc[v % 26] for v in self.df['WORDS']] D = self.df.pivot('ABC', rows=['AGE'], cols=['CONDITION'], aggregate='tolist') # verify the values in the table self.failUnlessEqual(str(D), R) def test7(self): # test group_concat R = """\ group_concat(WORDS) AGE CONDITION=adjective CONDITION=counting CONDITION=imagery CONDITION=intention CONDITION=rhyming ===================================================================================================================================================== old 11,13,8,6,14,11,13,13,10,11 9,8,6,8,10,4,6,5,7,7 12,11,16,11,9,23,12,10,19,11 10,19,14,5,10,11,14,15,11,11 7,9,6,6,6,11,6,3,8,7 young 14,11,18,14,13,22,17,16,12,11 8,6,4,6,7,6,5,7,9,7 20,16,16,15,18,16,20,22,14,19 21,19,17,15,22,16,22,22,18,21 10,7,8,10,4,7,10,6,7,7 """ D = self.df.pivot('WORDS', rows=['AGE'], cols=['CONDITION'], aggregate='group_concat') # verify the values in the table self.failUnlessEqual(str(D), R) def test8(self): # tolist handles text data differently then integer # or float data. We need to test this case as well R = """\ tolist(ABC) CONDITION=adjective CONDITION=counting CONDITION=imagery CONDITION=intention CONDITION=rhyming ==================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================== [u'L', u'N', u'I', u'G', u'O', u'L', u'N', u'N', u'K', u'L', u'O', u'L', u'S', u'O', u'N', u'W', u'R', u'Q', u'M', u'L'] [u'J', u'I', u'G', u'I', u'K', u'E', u'G', u'F', u'H', u'H', u'I', u'G', u'E', u'G', u'H', u'G', u'F', u'H', u'J', u'H'] [u'M', u'L', u'Q', u'L', u'J', u'X', u'M', u'K', u'T', u'L', u'U', u'Q', u'Q', u'P', u'S', u'Q', u'U', u'W', u'O', u'T'] [u'K', u'T', u'O', u'F', u'K', u'L', u'O', u'P', u'L', u'L', u'V', u'T', u'R', u'P', u'W', u'Q', u'W', u'W', u'S', u'V'] [u'H', u'J', u'G', u'G', u'G', u'L', u'G', u'D', u'I', u'H', u'K', u'H', u'I', u'K', u'E', u'H', u'K', u'G', u'H', u'H'] """ # caesar cipher num2abc = dict(zip(list(range(26)), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')) self.df['ABC'] = [num2abc[v % 26] for v in self.df['WORDS']] D = self.df.pivot('ABC', cols=['CONDITION'], aggregate='tolist') # verify the values in the table self.failUnlessEqual(str(D), R) def test9(self): # tolist handles text data differently then integer # or float data. We need to test this case as well R = """\ tolist(ABC) CONDITION Value ==================================================================================================================================== adjective [u'L', u'N', u'I', u'G', u'O', u'L', u'N', u'N', u'K', u'L', u'O', u'L', u'S', u'O', u'N', u'W', u'R', u'Q', u'M', u'L'] counting [u'J', u'I', u'G', u'I', u'K', u'E', u'G', u'F', u'H', u'H', u'I', u'G', u'E', u'G', u'H', u'G', u'F', u'H', u'J', u'H'] imagery [u'M', u'L', u'Q', u'L', u'J', u'X', u'M', u'K', u'T', u'L', u'U', u'Q', u'Q', u'P', u'S', u'Q', u'U', u'W', u'O', u'T'] intention [u'K', u'T', u'O', u'F', u'K', u'L', u'O', u'P', u'L', u'L', u'V', u'T', u'R', u'P', u'W', u'Q', u'W', u'W', u'S', u'V'] rhyming [u'H', u'J', u'G', u'G', u'G', u'L', u'G', u'D', u'I', u'H', u'K', u'H', u'I', u'K', u'E', u'H', u'K', u'G', u'H', u'H'] """ # caesar cipher num2abc = dict(zip(list(range(26)), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')) self.df['ABC'] = [num2abc[v % 26] for v in self.df['WORDS']] D = self.df.pivot('ABC', rows=['CONDITION'], aggregate='tolist') # verify the values in the table self.failUnlessEqual(str(D), R) def test10(self): # test group_concat R = """\ group_concat(WORDS) AGE=old AGE=young ====================================================================================================================================================================================================================================================================== 9,8,6,8,10,4,6,5,7,7,7,9,6,6,6,11,6,3,8,7,11,13,8,6,14,11,13,13,10,11,12,11,16,11,9,23,12,10,19,11,10,19,14,5,10,11,14,15,11,11 8,6,4,6,7,6,5,7,9,7,10,7,8,10,4,7,10,6,7,7,14,11,18,14,13,22,17,16,12,11,20,16,16,15,18,16,20,22,14,19,21,19,17,15,22,16,22,22,18,21 """ D = self.df.pivot('WORDS', cols=['AGE'], aggregate='group_concat') # verify the values in the table self.failUnlessEqual(str(D), R) def test11(self): # test group_concat R = """\ group_concat(WORDS) AGE Value ============================================================================================================================================ old 9,8,6,8,10,4,6,5,7,7,7,9,6,6,6,11,6,3,8,7,11,13,8,6,14,11,13,13,10,11,12,11,16,11,9,23,12,10,19,11,10,19,14,5,10,11,14,15,11,11 young 8,6,4,6,7,6,5,7,9,7,10,7,8,10,4,7,10,6,7,7,14,11,18,14,13,22,17,16,12,11,20,16,16,15,18,16,20,22,14,19,21,19,17,15,22,16,22,22,18,21 """ D = self.df.pivot('WORDS', rows=['AGE'], aggregate='group_concat') # verify the values in the table self.failUnlessEqual(str(D), R)