Exemple #1
0
def parse_lutkepohl_data(path):  # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from statsmodels.compat.pandas import datetools as dt

    from collections import deque
    from datetime import datetime
    import pandas
    import re

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip + 1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q'): frequencies.BQuarterEnd(),
        asbytes('M'): frequencies.BMonthEnd(),
        asbytes('A'): frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    from pandas import DatetimeIndex  # pylint: disable=E0611
    date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)

    return data, date_range
Exemple #2
0
def parse_lutkepohl_data(path): # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from statsmodels.compat.pandas import datetools as dt

    from collections import deque
    from datetime import datetime
    import pandas
    import re

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip+1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q') : frequencies.BQuarterEnd(),
        asbytes('M') : frequencies.BMonthEnd(),
        asbytes('A') : frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    from pandas import DatetimeIndex   # pylint: disable=E0611
    date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)

    return data, date_range
Exemple #3
0
def parse_lutkepohl_data(path):  # pragma: no cover
    """
    Parse data files from Lütkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from collections import deque
    from datetime import datetime
    import re

    regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = (pd.read_csv(path, delimiter=r"\s+",
                        header=to_skip + 1).to_records(index=False))

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q'): frequencies.BQuarterEnd(),
        asbytes('M'): frequencies.BMonthEnd(),
        asbytes('A'): frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    date_range = pd.date_range(start=start_date, freq=offset, periods=n)

    return data, date_range
 def _null_terminate(self, s, encoding):
     if PY3:  # have bytes not strings, so must decode
         null_byte = asbytes('\x00')
         try:
             s = s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             pass
         return s.decode(encoding)
     else:
         null_byte = asbytes('\x00')
         try:
             return s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             return s
Exemple #5
0
 def _null_terminate(self, s, encoding):
     if PY3: # have bytes not strings, so must decode
         null_byte = asbytes('\x00')
         try:
             s = s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             pass
         return s.decode(encoding)
     else:
         null_byte = asbytes('\x00')
         try:
             return s.lstrip(null_byte)[:s.index(null_byte)]
         except:
             return s
Exemple #6
0
 def _null_terminate(self, s, encoding):
     null_byte = asbytes('\x00')
     try:
         s = s.lstrip(null_byte)[:s.index(null_byte)]
     except Exception:
         pass
     return s.decode(encoding)
 def setup_class(cls):
     #SAS case
     cls.endog = dta3['Relief']
     cls.groups = dta3['Brand']
     cls.alpha = 0.05
     cls.setup_class_()
     #super(cls, cls).setup_class_()
     #CheckTuckeyHSD.setup_class_()
     cls.meandiff2 = sas_['mean']
     cls.confint2 = sas_[['lower','upper']].astype(float).values.reshape((3, 2))
     cls.reject2 = sas_['sig'] == asbytes('***')
Exemple #8
0
 def setup_class(cls):
     #SAS case
     cls.endog = dta3['Relief']
     cls.groups = dta3['Brand']
     cls.alpha = 0.05
     cls.setup_class_()
     #super(cls, cls).setup_class_()
     #CheckTuckeyHSD.setup_class_()
     cls.meandiff2 = sas_['mean']
     cls.confint2 = sas_[['lower','upper']].astype(float).values.reshape((3, 2))
     cls.reject2 = sas_['sig'] == asbytes('***')
    def setup_class(self):
        #SAS case
        self.endog = dta3['Relief']
        self.groups = dta3['Brand']
        self.alpha = 0.05
        self.setup_class_()
        #super(self, self).setup_class_()
        #CheckTuckeyHSD.setup_class_()

        self.meandiff2 = sas_['mean']
        self.confint2 = sas_[['lower','upper']].view(float).reshape((3,2))
        self.reject2 = sas_['sig'] == asbytes('***')
Exemple #10
0
    def setup_class(self):
        #SAS case
        self.endog = dta3['Relief']
        self.groups = dta3['Brand']
        self.alpha = 0.05
        self.setup_class_()
        #super(self, self).setup_class_()
        #CheckTuckeyHSD.setup_class_()

        self.meandiff2 = sas_['mean']
        self.confint2 = sas_[['lower', 'upper']].view(float).reshape((3, 2))
        self.reject2 = sas_['sig'] == asbytes('***')
Exemple #11
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                   dtype=[("double_miss", float), ("float_miss", np.float32),
                          ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
                     missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
            missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
 def _write(self, to_write):
     """
     Helper to call asbytes before writing to file for Python 3 compat.
     """
     self._file.write(asbytes(to_write))
Exemple #14
0
cyl_labels = np.array([
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'Japan', 'USA', 'USA', 'USA', 'Japan', 'Germany', 'France', 'Germany',
    'Sweden', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany', 'USA',
    'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'USA', 'USA', 'Germany', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany',
    'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France', 'Japan', 'Germany',
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan',
    'Japan', 'Japan', 'Japan', 'Japan', 'USA', 'USA', 'USA', 'USA', 'Japan',
    'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA'
])

#accommodate recfromtxt for python 3.2, requires bytes
ss = asbytes(ss)
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)

dta = pd.read_csv(BytesIO(ss), sep=r'\s+', header=None, engine='python')
dta.columns = "Rust", "Brand", "Replication"
dta2 = pd.read_csv(BytesIO(ss2), sep=r'\s+', header=None, engine='python')
dta2.columns = "idx", "Treatment", "StressReduction"
dta2["Treatment"] = dta2["Treatment"].map(lambda v: v.encode('utf-8'))
dta3 = pd.read_csv(BytesIO(ss3), sep=r'\s+', header=None, engine='python')
dta3.columns = ["Brand", "Relief"]
dta5 = pd.read_csv(BytesIO(ss5), sep=r'\t', header=None, engine='python')
dta5.columns = ['pair', 'mean', 'lower', 'upper', 'sig']
for col in ('pair', 'sig'):
    dta5[col] = dta5[col].map(lambda v: v.encode('utf-8'))
cylinders = np.array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, 4,
                    4, 4, 4, 4, 6, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 6, 6, 6, 6, 4, 4, 4, 4, 6, 6,
                    6, 6, 4, 4, 4, 4, 4, 8, 4, 6, 6, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
                    4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4])
cyl_labels = np.array(['USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'France',
    'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Japan', 'USA', 'USA', 'USA', 'Japan',
    'Germany', 'France', 'Germany', 'Sweden', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany',
    'USA', 'USA', 'France', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'Germany',
    'Japan', 'USA', 'USA', 'USA', 'USA', 'Germany', 'Japan', 'Japan', 'USA', 'Sweden', 'USA', 'France',
    'Japan', 'Germany', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA', 'USA',
    'Germany', 'Japan', 'Japan', 'USA', 'USA', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'Japan', 'USA',
    'USA', 'USA', 'USA', 'Japan', 'USA', 'USA', 'USA', 'Germany', 'USA', 'USA', 'USA'])

#accommodate recfromtxt for python 3.2, requires bytes
ss = asbytes(ss)
ss2 = asbytes(ss2)
ss3 = asbytes(ss3)
ss5 = asbytes(ss5)

dta = np.recfromtxt(BytesIO(ss), names=("Rust","Brand","Replication"))
dta2 = np.recfromtxt(BytesIO(ss2), names = ("idx", "Treatment", "StressReduction"))
dta3 = np.recfromtxt(BytesIO(ss3), names = ("Brand", "Relief"))
dta5 = np.recfromtxt(BytesIO(ss5), names = ('pair', 'mean', 'lower', 'upper', 'sig'), delimiter='\t')
sas_ = dta5[[1,3,2]]

from statsmodels.stats.multicomp import (tukeyhsd, pairwise_tukeyhsd,
                                         MultiComparison)
#import statsmodels.sandbox.stats.multicomp as multi
#print tukeyhsd(dta['Brand'], dta['Rust'])
Exemple #16
0
 def _write(self, to_write):
     """
     Helper to call asbytes before writing to file for Python 3 compat.
     """
     self._file.write(asbytes(to_write))