Esempio n. 1
0
def parse_lutkepohl_data(path):  # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from statsmodels.compat.pandas import datetools as dt

    from collections import deque
    from datetime import datetime
    import pandas
    import re

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip + 1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q'): frequencies.BQuarterEnd(),
        asbytes('M'): frequencies.BMonthEnd(),
        asbytes('A'): frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    from pandas import DatetimeIndex  # pylint: disable=E0611
    date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)

    return data, date_range
Esempio n. 2
0
def parse_lutkepohl_data(path):  # pragma: no cover
    """
    Parse data files from Lütkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from collections import deque
    from datetime import datetime
    import re

    regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = (pd.read_csv(path, delimiter=r"\s+",
                        header=to_skip + 1).to_records(index=False))

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q'): frequencies.BQuarterEnd(),
        asbytes('M'): frequencies.BMonthEnd(),
        asbytes('A'): frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    date_range = pd.date_range(start=start_date, freq=offset, periods=n)

    return data, date_range
Esempio n. 3
0
        ax.vlines(xs, [0], acorr[:, i])

        ax.axhline(0, color='k')
        ax.set_ylim([-1, 1])

        # hack?
        ax.set_xlim([-1, xs[-1] + 1])

    mpl.rcParams['font.size'] = old_size

#Example TSA descriptive

data = sm.datasets.macrodata.load()
mdata = data.data
df = DataFrame.from_records(mdata)
quarter_end = frequencies.BQuarterEnd()
df.index = [quarter_end.rollforward(datetime(int(y), int(q) * 3, 1))
for y, q in zip(df.pop('year'), df.pop('quarter'))]
logged = np.log(df.ix[:, ['m1', 'realgdp', 'cpi']])
logged.plot(subplots=True)

log_difference = logged.diff().dropna()
plot_acf_multiple(log_difference.values)

#Example TSA VAR

model = tsa.VAR(log_difference, freq='D')
print(model.select_order())

res = model.fit(2)
print(res.summary())