Esempio n. 1
0
def parse_lutkepohl_data(path):  # pragma: no cover
    """
    Parse data files from Lutkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from statsmodels.compat.pandas import datetools as dt

    from collections import deque
    from datetime import datetime
    import pandas
    import re

    regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = np.genfromtxt(path, names=True, skip_header=to_skip + 1)

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q'): frequencies.BQuarterEnd(),
        asbytes('M'): frequencies.BMonthEnd(),
        asbytes('A'): frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    from pandas import DatetimeIndex  # pylint: disable=E0611
    date_range = DatetimeIndex(start=start_date, freq=offset, periods=n)

    return data, date_range
Esempio n. 2
0
def parse_lutkepohl_data(path):  # pragma: no cover
    """
    Parse data files from Lütkepohl (2005) book

    Source for data files: www.jmulti.de
    """

    from collections import deque
    from datetime import datetime
    import re

    regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*'))
    with open(path, 'rb') as f:
        lines = deque(f)

    to_skip = 0
    while asbytes('*/') not in lines.popleft():
        #while '*/' not in lines.popleft():
        to_skip += 1

    while True:
        to_skip += 1
        line = lines.popleft()
        m = regex.match(line)
        if m:
            year, freq, start_point = m.groups()
            break

    data = (pd.read_csv(path, delimiter=r"\s+",
                        header=to_skip + 1).to_records(index=False))

    n = len(data)

    # generate the corresponding date range (using pandas for now)
    start_point = int(start_point)
    year = int(year)

    offsets = {
        asbytes('Q'): frequencies.BQuarterEnd(),
        asbytes('M'): frequencies.BMonthEnd(),
        asbytes('A'): frequencies.BYearEnd()
    }

    # create an instance
    offset = offsets[freq]

    inc = offset * (start_point - 1)
    start_date = offset.rollforward(datetime(year, 1, 1)) + inc

    offset = offsets[freq]
    date_range = pd.date_range(start=start_date, freq=offset, periods=n)

    return data, date_range