Beispiel #1
0
def long2wide(in_fname,
              id,
              dvs,
              between=[],
              within=[],
              covariates=[],
              out_fname=None,
              nested=True):

    # load in_fname into a PyvtTbl object
    print('reading "%s"...' % in_fname)
    cls = DataFrame()
    cls.read_tbl(in_fname)

    # loop through DVs and append within columns
    d = [sorted(set(cls[id]))]
    header = [id] + covariates + between

    for col in covariates + between:
        z = cls.pivot(col, cols=[id], aggregate='arbitrary')
        d.extend(list(z))

    # start controls whether nested factors are examined
    if nested: start = 1
    else: start = len(within)

    for i, dv in enumerate(dvs):
        print('\ncollaborating %s' % dv)
        for j in _xrange(start, len(within) + 1):

            for factors in _xunique_combinations(within, j):
                print('  pivoting', factors, '...')
                z = cls.pivot(dv, rows=factors, cols=[id], aggregate='avg')
                d.extend(list(z))

                # process headers
                for names in z.rnames:
                    h = '_'.join(('%s.%s' % (f, str(c)) for (f, c) in names))
                    header.append('%s__%s' % (dv, h))

    # Now we can write the data
    if out_fname == None:
        out_fname = 'wide_data.csv'

    with open(out_fname, 'wb') as f:
        wtr = csv.writer(f)
        wtr.writerow([n.upper() for n in header])
        wtr.writerows(zip(*d))  # transpose and write
Beispiel #2
0
    def test0(self):

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])

        self.assertAlmostEqual(3.22222222222, pt[1, 0], 5)
Beispiel #3
0
    def test1(self):

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY', 'MODEL'], ['COURSE'])

        self.assertEqual(repr(eval(repr(pt))), repr(pt))
Beispiel #4
0
    def test3(self):
        """method='full', aggregate=tolist, invalid row"""
        
        R = """\
tolist(id)
Name    Year     member=N       member=Y   
==========================================
name1   2010   [None, None]     [0.0, 0.0] 
name1   2011     [1.0, 1.0]   [None, None] 
name2   2010   [None, None]   [None, None] 
name2   2011   [None, None]     [2.0, 2.0] """
        
        df = DataFrame()
        df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':1})
        df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':1})
        df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':1})
        df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':2})
        df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':2})
        df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':2})
        
        my_pivot = df.pivot('id',rows = ['Name','Year'], cols = ['member'],
                            aggregate='tolist', method='full')
        

##        print(my_pivot)

        self.assertEqual(R,str(my_pivot))
Beispiel #5
0
    def test4(self):
        """method='full', aggregate=tolist, invalid col"""
        
        R = """\
tolist(id)
member   Name=name1,    Name=name1,    Name=name2,    Name=name2,  
          Year=2010      Year=2011      Year=2010      Year=2011   
==================================================================
N        [None, None]     [1.0, 1.0]   [None, None]   [None, None] 
Y          [0.0, 0.0]   [None, None]   [None, None]     [2.0, 2.0] """
        
        df = DataFrame()
        df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':1})
        df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':1})
        df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':1})
        df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y','rep':2})
        df.insert({'id':1,'Name':'name1','Year':2011,'member':'N','rep':2})
        df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y','rep':2})
        
        my_pivot = df.pivot('id',rows = ['member'], cols = ['Name','Year'],
                            aggregate='tolist', method='full')

##        print(my_pivot)

        self.assertEqual(R,str(my_pivot))
Beispiel #6
0
    def test2(self):
        R = """\
GROUP=AA,   GROUP=AA,   GROUP=AB,   GROUP=AB,   GROUP=LAB,   GROUP=LAB, 
 AGE=old    AGE=young    AGE=old    AGE=young    AGE=old     AGE=young  
=======================================================================
   22.188       9.813      29.188      10.041       34.141       11.875 """
        df = DataFrame()
        df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv')
        pt = df.pivot('SUPPRESSION',
                  cols=['GROUP', 'AGE'])
        df2 = pt.to_dataframe()
       
        self.assertEqual(str(df2),R)
Beispiel #7
0
    def test3(self):

        R = np.array(
            [[7.16666667, 6.5, 4., 3.22222222, 2.88888889, 1.55555556]])

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', cols=['TIMEOFDAY', 'COURSE'])

        self.assertEqual(str(R.shape), str(pt.shape))

        for r, L in zip(R.flatten(), pt.flatten()):
            self.assertAlmostEqual(r, L)
Beispiel #8
0
    def test2(self):
        R = [
            7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889,
            1.55555555556
        ]

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', cols=['TIMEOFDAY', 'COURSE'])
        pt_flat = pt.flatten()

        for r, d in zip(R, pt_flat):
            self.assertAlmostEqual(r, d)
Beispiel #9
0
    def test4(self):

        R = """\
avg(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3 
=============================================
T2              3.222       2.889       1.556 """

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])

        self.assertEqual(R, str(pt[-1:]))
Beispiel #10
0
    def test2(self):

        R = [
            7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889,
            1.55555555556
        ]

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY', 'COURSE'])

        for r, L in zip(R, pt.flat):
            self.assertEqual('%.5f' % r, '%.5f' % L)
Beispiel #11
0
    def test5(self):

        R = """\
avg(ERROR)
TIMEOFDAY   COURSE=C1 
=====================
T1              7.167 
T2              3.222 """

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])
        self.assertEqual(R, str(pt[:, 0]))
Beispiel #12
0
    def test1(self):
        R ="""\
N/A(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3   Total  
======================================================
T1             51.361      42.250          16   31.574 
T2             10.383       8.346       2.420    6.531 
======================================================
Total          23.040      18.778       7.716   15.178 """
        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        sums = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='sum')
        counts = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='count')
        aves = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'],aggregate='avg')
        calc_aves = sums/counts.astype(np.float64)
        
##        print('\n'.join(str(aves).split('\n')[1:]))
##        print('\n'.join(str(calc_aves).split('\n')[1:]))
        
        self.assertEqual('\n'.join(str(aves).split('\n')[1:]),
                         '\n'.join(str(calc_aves).split('\n')[1:]))
Beispiel #13
0
    def test0(self):
        R = """\
TIME    CONDITION=A   CONDITION=B 
=================================
day           1.864         1.933 
night         1.622         1.731 """
        
        df = DataFrame()
        df.read_tbl('data/example.csv')
        df['LOG10_X']=[math.log10(x) for x in df['X']]
        pt = df.pivot('LOG10_X', ['TIME'], ['CONDITION'])
        df2 = pt.to_dataframe()
        
        self.assertEqual(str(df2),R)
Beispiel #14
0
    def test2(self):
        # __add__ ndarray
        R ="""\
N/A(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3 
=============================================
T1             12.167      11.500           9 
T2              8.222       7.889       6.556 """
                
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])
        pt2=pt+np.array([[5,5,5], [5,5,5]])
        
        self.assertEqual(str(pt2),R)
Beispiel #15
0
    def test1(self):
        R ="""\
avg(ERROR)
TIMEOFDAY=T1,   TIMEOFDAY=T1,   TIMEOFDAY=T1,   TIMEOFDAY=T2,   TIMEOFDAY=T2,   TIMEOFDAY=T2,   Total 
  COURSE=C1       COURSE=C2       COURSE=C3       COURSE=C1       COURSE=C2       COURSE=C3           
=====================================================================================================
        7.167           6.500               4           3.222           2.889           1.556   3.896 """
        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', rows=['TIMEOFDAY','COURSE'])        
        pt2 = pt.transpose()

        self.assertEqual(str(pt2),R)
Beispiel #16
0
    def test0(self):
        # __add__ constant
        R ="""\
N/A(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3   Total  
======================================================
T1             12.167      11.500           9   10.619 
T2              8.222       7.889       6.556    7.556 
======================================================
Total           9.800       9.333       7.778    8.896 """
        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])
        self.assertAlmostEqual(np.sum(pt),25.3333333333, 5)
Beispiel #17
0
    def test2(self):
        # __mul__ ndarray
        R ="""\
N/A(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3 
=============================================
T1             35.833      32.500          20 
T2             16.111      14.444       7.778 """
                
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])
        pt2=pt*np.array([[5,5,5], [5,5,5]])

##        print(pt2)
        
        self.assertEqual(str(pt2),R)
Beispiel #18
0
    def test0(self):
        R = """\
avg(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3   Total 
=====================================================
T1              7.167       6.500           4   5.619 
T2                  0       2.889       1.556   2.556 
=====================================================
Total           4.800       4.333       2.778   3.896 """

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])

        pt[1, 0] = 0.

        self.assertEqual(R, str(pt))
Beispiel #19
0
    def test3(self):
        R = """\
GROUP    AGE    Value  
======================
AA      old     22.188 
AA      young    9.813 
AB      old     29.188 
AB      young   10.041 
LAB     old     34.141 
LAB     young   11.875 """
        df = DataFrame()
        df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv')
        pt = df.pivot('SUPPRESSION',
                  rows=['GROUP', 'AGE'])
        df2 = pt.to_dataframe()

        self.assertEqual(str(df2),R)
Beispiel #20
0
    def test3(self):

        R = [
            7.16666666667, 6.5, 4.0, 3.22222222222, 2.88888888889,
            1.55555555556
        ]

        Rinds = [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5)]

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', cols=['TIMEOFDAY', 'COURSE'])

        i = 0
        for inds, L in pt.ndenumerate():
            self.assertEqual('%.5f' % L, '%.5f' % R[i])
            self.assertEqual(str(inds), str(Rinds[i]))
            i += 1
Beispiel #21
0
    def test0(self):
        R ="""\
avg(ERROR)
COURSE   TIMEOFDAY=T1   TIMEOFDAY=T2   Total 
============================================
C1              7.167          3.222   4.800 
C2              6.500          2.889   4.333 
C3                  4          1.556   2.778 
============================================
Total           5.619          2.556   3.896 """
        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])        
        pt2 = pt.transpose()

        self.assertEqual(str(pt2),R)
Beispiel #22
0
    def test1(self):
        # __add__ PyvtTbl
        R ="""\
N/A(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3   Total  
======================================================
T1             14.333          13           8   11.238 
T2              6.444       5.778       3.111    5.111 
======================================================
Total           9.600       8.667       5.556    7.792 """
        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])
        pt2=pt+pt
        
        self.assertEqual(str(pt2),R)
Beispiel #23
0
    def test1(self):

        R = [
            """\
avg(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3 
=============================================
T1              7.167       6.500           4 """, """\
avg(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3 
=============================================
T2              3.222       2.889       1.556 """
        ]
        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])

        for r, L in zip(R, pt):
            self.assertAlmostEqual(r, str(L))
Beispiel #24
0
    def test2(self):

        R = """\
avg(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3   Total 
=====================================================
T1                 --          --          --   5.619 
T2              3.222       2.889       1.556   2.556 
=====================================================
Total           4.800       4.333       2.778   3.896 """

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'])

        pt[0, :] = [0, 0, 0]
        pt.mask[0, :] = [True, True, True]

        self.assertEqual(R, str(pt))
Beispiel #25
0
    def test2(self):
        R ="""\
avg(ERROR)
TIMEOFDAY   COURSE   Value 
==========================
T1          C1       7.167 
T1          C2       6.500 
T1          C3           4 
T2          C1       3.222 
T2          C2       2.889 
T2          C3       1.556 
==========================
Total                3.896 """        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', cols=['TIMEOFDAY','COURSE'])        
        pt2 = pt.transpose()

        self.assertEqual(str(pt2),R)
Beispiel #26
0
    def test0(self):
        # __mul__ constant
        R ="""\
N/A(ERROR)
TIMEOFDAY   COURSE=C1   COURSE=C2   COURSE=C3   Total  
======================================================
T1             35.833      32.500          20   28.095 
T2             16.111      14.444       7.778   12.778 
======================================================
Total              24      21.667      13.889   19.479 """
        
        
        df=DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'],['COURSE'])
        pt2=pt*5

##        print(pt2)
        
        self.assertEqual(str(pt2),R)
Beispiel #27
0
    def test4(self):

        R = [
            """\
avg(ERROR)
TIMEOFDAY                       COURSE=C1                                           COURSE=C2                                         COURSE=C3                   
=================================================================================================================================================================
T1          [10.0, 8.0, 6.0, 8.0, 7.0, 4.0, None, None, None]   [9.0, 10.0, 6.0, 4.0, 7.0, 3.0, None, None, None]   [7.0, 6.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 2.0] """,
            """\
avg(ERROR)
TIMEOFDAY                     COURSE=C1                                       COURSE=C2                                       COURSE=C3                   
=========================================================================================================================================================
T2          [5.0, 4.0, 3.0, 4.0, 3.0, 3.0, 4.0, 1.0, 2.0]   [4.0, 3.0, 3.0, 4.0, 2.0, 2.0, 3.0, 3.0, 2.0]   [2.0, 2.0, 1.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0] """
        ]

        df = DataFrame()
        df.read_tbl('data/error~subjectXtimeofdayXcourseXmodel_MISSING.csv')
        pt = df.pivot('ERROR', ['TIMEOFDAY'], ['COURSE'], aggregate='tolist')

        for r, L in zip(R, pt):
            self.assertEqual(r, str(L))
Beispiel #28
0
    def test1(self):
        R = """\
CYCLE   PHASE   GROUP=AA,   GROUP=AA,   GROUP=AB,   GROUP=AB,   GROUP=LAB,   GROUP=LAB, 
                 AGE=old    AGE=young    AGE=old    AGE=young    AGE=old     AGE=young  
=======================================================================================
    1   I          17.750       8.675      12.625       5.525       21.625        7.825 
    1   II         20.875       8.300      22.750       8.675       36.250       13.750 
    2   I          22.375      10.225      23.500       8.825       21.375        9.900 
    2   II         28.125      10.250      41.125      13.100       46.875       14.375 
    3   I          23.125      10.500      20.000       9.125       23.750        9.500 
    3   II         20.750       9.525      46.125      14.475       50.375       15.575 
    4   I          20.250       9.925      15.625       7.750       26.375        9.650 
    4   II         24.250      11.100      51.750      12.850       46.500       14.425 """
        df = DataFrame()
        df.read_tbl('data/suppression~subjectXgroupXageXcycleXphase.csv')
        pt = df.pivot('SUPPRESSION',
                  rows=['CYCLE', 'PHASE'],
                  cols=['GROUP', 'AGE'])
        df2 = pt.to_dataframe()

        self.assertEqual(str(df2),R)
Beispiel #29
0
    def test1(self):
        """method='valid', aggregate=count, invalid row"""
        
        R = """\
count(id)
Name    Year   member=N   member=Y   Total 
==========================================
name1   2010          0          1       1 
name1   2011          1          0       1 
name2   2011          0          1       1 
==========================================
Total                 1          2       3 """
        
        df = DataFrame()
        df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y'})
        df.insert({'id':1,'Name':'name1','Year':2011,'member':'N'})
        df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y'})
        
        my_pivot = df.pivot('id',rows = ['Name','Year'], cols = ['member'], aggregate='count')

        self.assertEqual(R,str(my_pivot))
Beispiel #30
0
    def test2(self):
        """method='valid', aggregate=count, invalid col"""
        
        R = """\
count(id)
member   Name=name1,   Name=name1,   Name=name2,   Total 
          Year=2010     Year=2011     Year=2011          
========================================================
N                  0             1             0       1 
Y                  1             0             1       2 
========================================================
Total              1             1             1       3 """
        
        df = DataFrame()
        df.insert({'id':0,'Name':'name1','Year':2010,'member':'Y'})
        df.insert({'id':1,'Name':'name1','Year':2011,'member':'N'})
        df.insert({'id':2,'Name':'name2','Year':2011,'member':'Y'})
        
        my_pivot = df.pivot('id',rows = ['member'], cols = ['Name','Year'], aggregate='count')
        
        self.assertEqual(R,str(my_pivot))