Esempio n. 1
0
def extract():
    fields = [
        'Date', 'Total Equity', 'Domestic Equity', 'World Equity', 'Hybrid',
        'Total Bond', 'Taxable Bond', 'Municipal Bond', 'Total'
    ]
    fin = open(archive)
    records, metadata = xls.parse(fin)
    records = [[r[f['id']] for f in metadata['fields']] for r in records]
    # discard first 2 rows as blank / table title
    records = records[2:]

    for r in records:
        if isinstance(r[0], datetime.datetime):
            # get rid of time part
            r[0] = r[0].isoformat()[:10]
        for idx in range(1, len(r)):
            if isinstance(r[idx], float):
                r[idx] = int(r[idx])

    # split out weekly from monthly
    # somewhere down sheet have blank line then heading for weekly stuff
    for count, r in enumerate(records):
        if r[0] == 'Estimated Weekly Net New Cash Flow':
            weekly = records[count + 1:count + 7]
            # -1 as blank line above
            monthly = records[:count - 1]

    # here we just overwrite as they seem to append (earliest data still there)
    fo = open('data/monthly.csv', 'w')
    writer = csv.writer(fo, lineterminator='\n')
    writer.writerow(fields)
    writer.writerows(monthly)

    # now do the weekly data - have to merge with existing data as they only
    # give us the latest few weeks
    weeklyfp = 'data/weekly.csv'
    existing = []
    if os.path.exists(weeklyfp):
        existing = csv.reader(open('data/weekly.csv'))
        existing.next()
        existing = [x for x in existing]
    # combine existing and weekly
    # weekly stuff should be newer so algorithm is:
    # iterate through existing and add to weekly until we get
    # existing row with date equal to oldest new row and then we break
    oldest_new_week = list(weekly[0])
    overlap = False
    for count, row in enumerate(existing):
        if row[0] == oldest_new_week[0]:
            overlap = True
            weekly = existing[:count] + weekly
            break
    # default weekly to everything in case we do not have an overlap
    if not overlap:
        weekly = existing + weekly

    fo = open('data/weekly.csv', 'w')
    writer = csv.writer(fo, lineterminator='\n')
    writer.writerow(fields)
    writer.writerows(weekly)
Esempio n. 2
0
def extract(fp=cachepath):
    rows, metadata = xls.parse(open(fp))
    # convert from iterator to list and rows of dictionaries to rows of lists
    rows = [[row[f['id']] for f in metadata['fields']] for row in rows]

    # headings spread across rows 2-8
    header = [
        'Date', 'SP500', 'Dividend', 'Earnings', 'Consumer Price Index',
        'Long Interest Rate', 'Real Price', 'Real Dividend', 'Real Earnings',
        'PE10'
    ]

    # first rows is header, last row is footnotes
    rows = rows[1:-1]
    # usually one last row with footnotes but some months stuff goes wrong and we
    # have extra rows and then something random e.g. april 2016 there is a
    # random number 20+ rows down
    rows = [r for r in rows if r[0] != '']
    transposed = zip(*rows)
    # fix dates
    # delete "date fraction" column
    del transposed[5]
    # seem to have a random extra blank 6 columns
    del transposed[-6:]
    transposed[0] = [_fixdates(val) for val in transposed[0]]
    for idx, row in enumerate(transposed[1:]):
        row = [_fixup(val) for val in row]
        transposed[idx + 1] = row

    data = zip(*transposed)
    fout = open(out_filepath, 'w')
    writer = csv.writer(fout, lineterminator='\n')
    writer.writerow(header)
    writer.writerows(data)
Esempio n. 3
0
def extract(fp=cachepath):
    rows, metadata = xls.parse(open(fp))
    # convert from iterator to list and rows of dictionaries to rows of lists
    rows = [ [row[f['id']] for f in metadata['fields']] for row in rows ]

    # headings spread across rows 2-8
    header = [
        'Date',
        'SP500',
        'Dividend',
        'Earnings',
        'Consumer Price Index',
        'Long Interest Rate',
        'Real Price',
        'Real Dividend',
        'Real Earnings',
        'P/E10'
        ]

    # first rows is header, last row is footnotes
    data = rows[1:-1]
    transposed = zip(*data)
    # fix dates
    # delete "date fraction" column
    del transposed[5]
    transposed[0] = [ _fixdates(val) for val in transposed[0] ]
    for idx, row in enumerate(transposed[1:]):
        row = [ _fixup(val) for val in row ]
        transposed[idx+1] = row
    
    data = zip(*transposed)
    fout = open(out_filepath, 'w')
    writer = csv.writer(fout, lineterminator='\n')
    writer.writerow(header)
    writer.writerows(data)
Esempio n. 4
0
def extract(fp=CACHE_PATH):
    rows, metadata = xls.parse(open(fp))
    # convert from iterator to list and rows of dictionaries to rows of lists
    rows = [[row[f['id']] for f in metadata['fields']] for row in rows]

    # headings spread across rows 2-8
    header = [
        'Date', 'SP500', 'Dividend', 'Earnings', 'Consumer Price Index',
        'Long Interest Rate', 'Real Price', 'Real Dividend', 'Real Earnings',
        'PE10'
    ]

    # first rows is header, last row is footnotes
    data = filter(is_valid_raw, rows)
    transposed = zip(*data)
    # remove empty columns in the end
    del transposed[11:]
    # fix dates
    # delete "date fraction" column
    del transposed[5]
    transposed[0] = [_fixdates(val) for val in transposed[0]]
    for idx, row in enumerate(transposed[1:]):
        row = [_fixup(val) for val in row]
        transposed[idx + 1] = row

    data = zip(*transposed)
    fout = open(OUT_FILEPATH, 'w')
    writer = csv.writer(fout, lineterminator='\n')
    writer.writerow(header)
    writer.writerows(data)
Esempio n. 5
0
def extract(url):
    fo = urllib.urlopen(url)
    records, metadata = xls.parse(fo)

    def rerowify(dict_):
        return [dict_[f['id']] for f in metadata['fields']]

    rows = [rerowify(r) for r in records]
    del rows[:8]
    transposed = zip(*rows)
    annual = zip(*transposed[:3])
    annual = [[int(r[0])] + list(r[1:]) for r in annual if r[0]]
    quarterly = zip(*transposed[4:7])
    # 1947q1 etc
    def fixquarters(date):
        mapping = [
            ['Q1', '-01-01'],
            ['Q2', '-04-01'],
            ['Q3', '-07-01'],
            ['Q4', '-10-01']
        ]
        for x in mapping:
            date = date.replace(x[0], x[1])
        return str(date)
    quarterly = [[fixquarters(r[0])] + list(r[1:]) for r in quarterly]
    return (annual, quarterly)
Esempio n. 6
0
 def test_2_header_type(self):
     """Test guessing header type"""
     xlsfo = open(os.path.join(self.testdata_path, 'simple.xls'))
     iterator, metadata = xls.parse(xlsfo)
     assert_equal(
         [{'id': u'date', 'type': 'DateTime'}, {'id': u'temperature',
         'type': 'Integer'}, {'id': u'place', 'type': 'String'}],
         metadata['fields'])
Esempio n. 7
0
 def test_1_convert_xls(self):
     """Test converting a XLS to JSON"""
     xlsfo = open(os.path.join(self.testdata_path, 'simple.xls'))
     iterator, metadata = xls.parse(xlsfo, guess_types=False)
     assert_equal([{"id": u"date"}, {"id": u"temperature"}, {"id":
                      u"place"}], metadata['fields'])
     content = [row for row in iterator]
     assert ({u'date': datetime.datetime(2011, 1, 1, 0, 0), u'place': u'Galway',
             u'temperature': 1.0} in content)
Esempio n. 8
0
 def test_3_convert_xlsx(self):
     """Test converting a XLSX to JSON"""
     xlsfo = open(os.path.join(self.testdata_path, 'simple.xlsx'))
     iterator, metadata = xls.parse(xlsfo, excel_type='xlsx')
     assert_equal([{'type': 'DateTime', 'id': u'date'}, {'id':
                      u'temperature', 'type': 'Integer'}, {'id': u'place',
                      'type': 'String'}], metadata['fields'])
     content = [row for row in iterator]
     assert ({u'date': datetime.datetime(2011, 1, 1, 0, 0), u'place': u'Galway',
             u'temperature': 1} in content)
Esempio n. 9
0
 def test_2_header_type(self):
     """Test guessing header type"""
     xlsfo = open(os.path.join(self.testdata_path, 'simple.xls'))
     iterator, metadata = xls.parse(xlsfo)
     assert_equal([{
         'id': u'date',
         'type': 'DateTime'
     }, {
         'id': u'temperature',
         'type': 'Integer'
     }, {
         'id': u'place',
         'type': 'String'
     }], metadata['fields'])
Esempio n. 10
0
 def test_1_convert_xls(self):
     """Test converting a XLS to JSON"""
     xlsfo = open(os.path.join(self.testdata_path, 'simple.xls'))
     iterator, metadata = xls.parse(xlsfo, guess_types=False)
     assert_equal([{
         "id": u"date"
     }, {
         "id": u"temperature"
     }, {
         "id": u"place"
     }], metadata['fields'])
     content = [row for row in iterator]
     assert ({
         u'date': datetime.datetime(2011, 1, 1, 0, 0),
         u'place': u'Galway',
         u'temperature': 1.0
     } in content)
Esempio n. 11
0
 def test_3_convert_xlsx(self):
     """Test converting a XLSX to JSON"""
     xlsfo = open(os.path.join(self.testdata_path, 'simple.xlsx'))
     iterator, metadata = xls.parse(xlsfo, excel_type='xlsx')
     assert_equal([{
         'type': 'DateTime',
         'id': u'date'
     }, {
         'id': u'temperature',
         'type': 'Integer'
     }, {
         'id': u'place',
         'type': 'String'
     }], metadata['fields'])
     content = [row for row in iterator]
     assert ({
         u'date': datetime.datetime(2011, 1, 1, 0, 0),
         u'place': u'Galway',
         u'temperature': 1
     } in content)
Esempio n. 12
0
def extract(fp=cachepath):
    rows, metadata = xls.parse(open(fp))
    # convert from iterator to list and rows of dictionaries to rows of lists
    rows = [ [row[f['id']] for f in metadata['fields']] for row in rows ]

    # headings spread across rows 2-8
    header = [
        'Date',
        'SP500',
        'Dividend',
        'Earnings',
        'Consumer Price Index',
        'Long Interest Rate',
        'Real Price',
        'Real Dividend',
        'Real Earnings',
        'PE10'
        ]

    # first rows is header, last row is footnotes
    rows = rows[1:-1]
    # usually one last row with footnotes but some months stuff goes wrong and we
    # have extra rows and then something random e.g. april 2016 there is a
    # random number 20+ rows down
    rows = [ r for r in rows if r[0] != '' ]
    transposed = zip(*rows)
    # fix dates
    # delete "date fraction" column
    del transposed[5]
    # seem to have a random extra blank 6 columns
    del transposed[-6:]
    transposed[0] = [ _fixdates(val) for val in transposed[0] ]
    for idx, row in enumerate(transposed[1:]):
        row = [ _fixup(val) for val in row ]
        transposed[idx+1] = row
    
    data = zip(*transposed)
    fout = open(out_filepath, 'w')
    writer = csv.writer(fout, lineterminator='\n')
    writer.writerow(header)
    writer.writerows(data)
Esempio n. 13
0
def parse(*args, **kwargs):
    kwargs['excel_type'] = 'xlsx'
    return xls.parse(*args, **kwargs)
Esempio n. 14
0
def parse(*args, **kwargs):
    kwargs['excel_type'] = 'xlsx'
    return xls.parse(*args, **kwargs)
def extract():
    fields = [ 
        'Date',
        'Total Equity',
        'Domestic Equity',
        'World Equity',
        'Hybrid',
        'Total Bond',
        'Taxable Bond',
        'Municipal Bond',
        'Total'
        ]
    fin = open(archive)
    records, metadata = xls.parse(fin)
    records = [ [ r[f['id']] for f in metadata['fields'] ] for r in records ]
    # discard first 2 rows as blank / table title
    records = records[2:]
    
    for r in records:  
        if isinstance(r[0], datetime.datetime):
            # get rid of time part
            r[0] = r[0].isoformat()[:10]
        for idx in range(1, len(r)):
            if isinstance(r[idx], float):
                r[idx] = int(r[idx])

    # split out weekly from monthly
    # somewhere down sheet have blank line then heading for weekly stuff
    for count, r in enumerate(records):
        if r[0] == 'Estimated Weekly Net New Cash Flow':
            weekly = records[count+1:count+7]
            # -1 as blank line above
            monthly = records[:count-1]

    # here we just overwrite as they seem to append (earliest data still there)
    fo = open('data/monthly.csv', 'w')
    writer = csv.writer(fo, lineterminator='\n')
    writer.writerow(fields)
    writer.writerows(monthly)

    # now do the weekly data - have to merge with existing data as they only
    # give us the latest few weeks
    weeklyfp = 'data/weekly.csv'
    existing = []
    if os.path.exists(weeklyfp):
        existing = csv.reader(open('data/weekly.csv'))
        existing.next()
        existing = [ x for x in existing ]
    # combine existing and weekly
    # weekly stuff should be newer so algorithm is:
    # iterate through existing and add to weekly until we get
    # existing row with date equal to oldest new row and then we break
    oldest_new_week = list(weekly[0])
    overlap = False
    for count,row in enumerate(existing):
        if row[0] == oldest_new_week[0]:
            overlap = True
            weekly = existing[:count] + weekly
            break
    # default weekly to everything in case we do not have an overlap
    if not overlap:
        weekly = existing + weekly

    fo = open('data/weekly.csv', 'w')
    writer = csv.writer(fo, lineterminator='\n')
    writer.writerow(fields)
    writer.writerows(weekly)