Example #1
0
def parse_lutkepohl_data(path): # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from collections import deque
    from datetime import datetime
    import pandas
    import pandas.core.datetools as dt
    import re
    from statsmodels.compatnp.py3k import asbytes

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    lines = deque(open(path, 'rb'))

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip+1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q') : dt.BQuarterEnd(),
        asbytes('M') : dt.BMonthEnd(),
        asbytes('A') : dt.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    try:
        from pandas import DatetimeIndex   # pylint: disable=E0611
        date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)
    except ImportError:
        from pandas import DateRange
        date_range = DateRange(start_date, offset=offset, periods=n)

    return data, date_range
Example #2
0
 def _null_terminate(self, s, encoding):
     if PY3:  # have bytes not strings, so must decode
         null_byte = asbytes('\x00')
         try:
             s = s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             pass
         return s.decode(encoding)
     else:
         null_byte = asbytes('\x00')
         try:
             return s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             return s
Example #3
0
 def _null_terminate(self, s, encoding):
     if PY3: # have bytes not strings, so must decode
         null_byte = asbytes('\x00')
         try:
             s = s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             pass
         return s.decode(encoding)
     else:
         null_byte = asbytes('\x00')
         try:
             return s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             return s
Example #4
0
 def _null_terminate(self, s, encoding):
     null_byte = asbytes('\x00')
     if PY3:
         s += null_byte
         return s.encode(encoding)
     else:
         s += null_byte
         return s
Example #5
0
 def _null_terminate(self, s, encoding):
     null_byte = asbytes('\x00')
     if PY3:
         s += null_byte
         return s.encode(encoding)
     else:
         s += null_byte
         return s
Example #6
0
    def setup_class(self):
        #SAS case
        self.endog = dta3['Relief']
        self.groups = dta3['Brand']
        self.alpha = 0.05
        self.setup_class_()
        #super(self, self).setup_class_()
        #CheckTuckeyHSD.setup_class_()

        self.meandiff2 = sas_['mean']
        self.confint2 = sas_[['lower', 'upper']].view(float).reshape((3, 2))
        self.reject2 = sas_['sig'] == asbytes('***')
Example #7
0
    def setup_class(self):
        #SAS case
        self.endog = dta3['Relief']
        self.groups = dta3['Brand']
        self.alpha = 0.05
        self.setup_class_()
        #super(self, self).setup_class_()
        #CheckTuckeyHSD.setup_class_()

        self.meandiff2 = sas_['mean']
        self.confint2 = sas_[['lower','upper']].view(float).reshape((3,2))
        self.reject2 = sas_['sig'] == asbytes('***')
Example #8
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
            missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Example #9
0
 def _write(self, to_write):
     """
     Helper to call asbytes before writing to file for Python 3 compat.
     """
     self._file.write(asbytes(to_write))
Example #10
0
3 26.1
3 28.3
3 24.3
3 26.2
3 27.8'''

ss5 = '''\
2 - 3\t4.340\t0.691\t7.989\t***
2 - 1\t4.600\t0.951\t8.249\t***
3 - 2\t-4.340\t-7.989\t-0.691\t***
3 - 1\t0.260\t-3.389\t3.909\t-
1 - 2\t-4.600\t-8.249\t-0.951\t***
1 - 3\t-0.260\t-3.909\t3.389\t'''

#accommodate recfromtxt for python 3.2, requires bytes
ss = asbytes(ss)
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)

dta = np.recfromtxt(BytesIO(ss), names=("Rust", "Brand", "Replication"))
dta2 = np.recfromtxt(BytesIO(ss2),
                     names=("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(BytesIO(ss3), names=("Brand", "Relief"))
dta5 = np.recfromtxt(BytesIO(ss5),
                     names=('pair', 'mean', 'lower', 'upper', 'sig'),
                     delimiter='\t')
sas_ = dta5[[1, 3, 2]]

from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd,
                                         MultiComparison)
Example #11
0
3 26.1
3 28.3
3 24.3
3 26.2
3 27.8'''

ss5 = '''\
2 - 3	4.340	0.691	7.989	***
2 - 1	4.600	0.951	8.249	***
3 - 2	-4.340	-7.989	-0.691	***
3 - 1	0.260	-3.389	3.909	 -
1 - 2	-4.600	-8.249	-0.951	***
1 - 3	-0.260	-3.909	3.389	'''

#accommodate recfromtxt for python 3.2, requires bytes
ss = asbytes(ss)
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)

dta = np.recfromtxt(BytesIO(ss), names=("Rust","Brand","Replication"))
dta2 = np.recfromtxt(BytesIO(ss2), names = ("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(BytesIO(ss3), names = ("Brand", "Relief"))
dta5 = np.recfromtxt(BytesIO(ss5), names = ('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t')
sas_ = dta5[[1,3,2]]

from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd,
                                         MultiComparison)
#import statsmodels.sandbox.stats.multicomp as multi
#print tukeyhsd(dta['Brand'], dta['Rust'])
Example #12
0
 def _write(self, to_write):
     """
     Helper to call asbytes before writing to file for Python 3 compat.
     """
     self._file.write(asbytes(to_write))